{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "b6a193ea-b95c-4c9a-bf17-5176ff6729d0",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "2a06bcb2-072e-4160-9dec-06e2ad94e82f",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>p_val</th>\n",
       "      <th>avg_log2FC</th>\n",
       "      <th>pct.1</th>\n",
       "      <th>pct.2</th>\n",
       "      <th>p_val_adj</th>\n",
       "      <th>cluster</th>\n",
       "      <th>gene</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>MZB1</td>\n",
       "      <td>1.728998e-253</td>\n",
       "      <td>2.631042</td>\n",
       "      <td>0.992</td>\n",
       "      <td>0.308</td>\n",
       "      <td>5.092245e-249</td>\n",
       "      <td>0</td>\n",
       "      <td>MZB1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>DERL3</td>\n",
       "      <td>4.216278e-247</td>\n",
       "      <td>2.756492</td>\n",
       "      <td>0.982</td>\n",
       "      <td>0.203</td>\n",
       "      <td>1.241778e-242</td>\n",
       "      <td>0</td>\n",
       "      <td>DERL3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SLAMF7</td>\n",
       "      <td>4.610962e-237</td>\n",
       "      <td>2.792783</td>\n",
       "      <td>0.950</td>\n",
       "      <td>0.149</td>\n",
       "      <td>1.358020e-232</td>\n",
       "      <td>0</td>\n",
       "      <td>SLAMF7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>TNFRSF17</td>\n",
       "      <td>8.844840e-233</td>\n",
       "      <td>2.919221</td>\n",
       "      <td>0.876</td>\n",
       "      <td>0.116</td>\n",
       "      <td>2.604982e-228</td>\n",
       "      <td>0</td>\n",
       "      <td>TNFRSF17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>IFNG-AS1</td>\n",
       "      <td>5.101002e-209</td>\n",
       "      <td>2.554297</td>\n",
       "      <td>0.870</td>\n",
       "      <td>0.130</td>\n",
       "      <td>1.502347e-204</td>\n",
       "      <td>0</td>\n",
       "      <td>IFNG-AS1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>74553</th>\n",
       "      <td>TWIST1.40</td>\n",
       "      <td>9.732295e-03</td>\n",
       "      <td>-3.994191</td>\n",
       "      <td>0.031</td>\n",
       "      <td>0.141</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>44</td>\n",
       "      <td>TWIST1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>74554</th>\n",
       "      <td>IGKV2-26.3</td>\n",
       "      <td>9.850416e-03</td>\n",
       "      <td>2.172777</td>\n",
       "      <td>0.047</td>\n",
       "      <td>0.004</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>44</td>\n",
       "      <td>IGKV2-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>74555</th>\n",
       "      <td>TM4SF1.35</td>\n",
       "      <td>9.925528e-03</td>\n",
       "      <td>-5.324712</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.105</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>44</td>\n",
       "      <td>TM4SF1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>74556</th>\n",
       "      <td>ADCY2.42</td>\n",
       "      <td>9.925528e-03</td>\n",
       "      <td>-4.398575</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.093</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>44</td>\n",
       "      <td>ADCY2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>74557</th>\n",
       "      <td>FABP4.37</td>\n",
       "      <td>9.954299e-03</td>\n",
       "      <td>-6.738506</td>\n",
       "      <td>0.031</td>\n",
       "      <td>0.143</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>44</td>\n",
       "      <td>FABP4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>74558 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       Unnamed: 0          p_val  avg_log2FC  pct.1  pct.2      p_val_adj  \\\n",
       "0            MZB1  1.728998e-253    2.631042  0.992  0.308  5.092245e-249   \n",
       "1           DERL3  4.216278e-247    2.756492  0.982  0.203  1.241778e-242   \n",
       "2          SLAMF7  4.610962e-237    2.792783  0.950  0.149  1.358020e-232   \n",
       "3        TNFRSF17  8.844840e-233    2.919221  0.876  0.116  2.604982e-228   \n",
       "4        IFNG-AS1  5.101002e-209    2.554297  0.870  0.130  1.502347e-204   \n",
       "...           ...            ...         ...    ...    ...            ...   \n",
       "74553   TWIST1.40   9.732295e-03   -3.994191  0.031  0.141   1.000000e+00   \n",
       "74554  IGKV2-26.3   9.850416e-03    2.172777  0.047  0.004   1.000000e+00   \n",
       "74555   TM4SF1.35   9.925528e-03   -5.324712  0.000  0.105   1.000000e+00   \n",
       "74556    ADCY2.42   9.925528e-03   -4.398575  0.000  0.093   1.000000e+00   \n",
       "74557    FABP4.37   9.954299e-03   -6.738506  0.031  0.143   1.000000e+00   \n",
       "\n",
       "       cluster      gene  \n",
       "0            0      MZB1  \n",
       "1            0     DERL3  \n",
       "2            0    SLAMF7  \n",
       "3            0  TNFRSF17  \n",
       "4            0  IFNG-AS1  \n",
       "...        ...       ...  \n",
       "74553       44    TWIST1  \n",
       "74554       44  IGKV2-26  \n",
       "74555       44    TM4SF1  \n",
       "74556       44     ADCY2  \n",
       "74557       44     FABP4  \n",
       "\n",
       "[74558 rows x 8 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "path_3000 = \"../../2024paper/output/SB66_combined_markers_3000_ident1000_20250728.csv\"\n",
    "df1 = pd.read_csv(path_3000)\n",
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "c175e49e-8d3c-46ab-9444-740380a28aec",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "cell_marker_dict = {\"Naive CD4+ T\" : ['IL7R','CCR7'],\n",
    "              \"CD14+ Mono\" : [\"CD14\", \"LYZ\"],\n",
    "               \"Meomory CD4+\" : ['IL7R', 'S100A4'],\n",
    "               \"B\" : ['MS4A1'],\n",
    "               'CD8+ T' : ['CD8A'],\n",
    "               'FCGR3A+ Mono' : ['FCGR3A', 'MS4A7'],\n",
    "               'NK' : ['GNLY', 'NKG7'],\n",
    "               'DC' : ['RCER1A', 'CST3'],\n",
    "               'Platelet' : ['PPBP']\n",
    "              }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "7e2ff749-feea-447b-9d0c-667f2d28e463",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "geneList = [\"IL7R\", \"CCR7\", \"CD14\",\"LYZ\", \"IL7R\", \"S100A4\", \"MS4A1\", \"CD8A\", \"FCGR3A\", \"MS4A7\", \"GNLY\", \"NKG7\", \"FCER1A\", \"CST3\", \"PPBP\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "aa1dac8d-fdcb-429b-8d2a-b194061e0c39",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cluster</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>517</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1649</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>1414</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>698</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>544</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>2507</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>1140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>2536</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>624</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   cluster  size\n",
       "0        0   517\n",
       "1        1  1649\n",
       "2        2  1414\n",
       "3        3   698\n",
       "4        4   544\n",
       "5        5  2507\n",
       "6        6  1140\n",
       "7        7  2536\n",
       "8        8   624"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('cluster', as_index=False).size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "c5a56dea-f3cd-4338-9c91-6fb9f14495cb",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "cluster_0 = df[df['cluster'] == 0]\n",
    "cluster_1 = df[df['cluster'] == 1]\n",
    "cluster_2 = df[df['cluster'] == 2]\n",
    "cluster_3 = df[df['cluster'] == 3]\n",
    "cluster_4 = df[df['cluster'] == 4]\n",
    "cluster_5 = df[df['cluster'] == 5]\n",
    "cluster_6 = df[df['cluster'] == 6]\n",
    "cluster_7 = df[df['cluster'] == 7]\n",
    "cluster_8 = df[df['cluster'] == 8]\n",
    "df_list = [cluster_0, cluster_1, cluster_2, cluster_3, cluster_4, cluster_5, cluster_6, cluster_7, cluster_8]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "2fc2a880-62b9-4239-9a6c-770d709ab30b",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>p_val</th>\n",
       "      <th>avg_log2FC</th>\n",
       "      <th>pct.1</th>\n",
       "      <th>pct.2</th>\n",
       "      <th>p_val_adj</th>\n",
       "      <th>cluster</th>\n",
       "      <th>gene</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.273332e-143</td>\n",
       "      <td>0.738706</td>\n",
       "      <td>1.000</td>\n",
       "      <td>0.991</td>\n",
       "      <td>1.746248e-139</td>\n",
       "      <td>0</td>\n",
       "      <td>RPS12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6.817653e-143</td>\n",
       "      <td>0.693452</td>\n",
       "      <td>1.000</td>\n",
       "      <td>0.995</td>\n",
       "      <td>9.349729e-139</td>\n",
       "      <td>0</td>\n",
       "      <td>RPS6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.661810e-141</td>\n",
       "      <td>0.737260</td>\n",
       "      <td>0.999</td>\n",
       "      <td>0.992</td>\n",
       "      <td>6.393206e-137</td>\n",
       "      <td>0</td>\n",
       "      <td>RPS27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8.158412e-138</td>\n",
       "      <td>0.626608</td>\n",
       "      <td>0.999</td>\n",
       "      <td>0.995</td>\n",
       "      <td>1.118845e-133</td>\n",
       "      <td>0</td>\n",
       "      <td>RPL32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5.177478e-130</td>\n",
       "      <td>0.633696</td>\n",
       "      <td>1.000</td>\n",
       "      <td>0.994</td>\n",
       "      <td>7.100394e-126</td>\n",
       "      <td>0</td>\n",
       "      <td>RPS14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>512</th>\n",
       "      <td>9.642293e-03</td>\n",
       "      <td>0.630428</td>\n",
       "      <td>0.064</td>\n",
       "      <td>0.040</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>0</td>\n",
       "      <td>CTD-2336O2.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>513</th>\n",
       "      <td>9.651791e-03</td>\n",
       "      <td>1.583601</td>\n",
       "      <td>0.016</td>\n",
       "      <td>0.006</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>0</td>\n",
       "      <td>MTHFD1L</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>514</th>\n",
       "      <td>9.795989e-03</td>\n",
       "      <td>0.644169</td>\n",
       "      <td>0.029</td>\n",
       "      <td>0.054</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>0</td>\n",
       "      <td>RARS</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>515</th>\n",
       "      <td>9.854458e-03</td>\n",
       "      <td>0.705814</td>\n",
       "      <td>0.225</td>\n",
       "      <td>0.186</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>0</td>\n",
       "      <td>GIMAP2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>516</th>\n",
       "      <td>9.872103e-03</td>\n",
       "      <td>0.776262</td>\n",
       "      <td>0.073</td>\n",
       "      <td>0.048</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>0</td>\n",
       "      <td>TBRG4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>517 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             p_val  avg_log2FC  pct.1  pct.2      p_val_adj  cluster  \\\n",
       "0    1.273332e-143    0.738706  1.000  0.991  1.746248e-139        0   \n",
       "1    6.817653e-143    0.693452  1.000  0.995  9.349729e-139        0   \n",
       "2    4.661810e-141    0.737260  0.999  0.992  6.393206e-137        0   \n",
       "3    8.158412e-138    0.626608  0.999  0.995  1.118845e-133        0   \n",
       "4    5.177478e-130    0.633696  1.000  0.994  7.100394e-126        0   \n",
       "..             ...         ...    ...    ...            ...      ...   \n",
       "512   9.642293e-03    0.630428  0.064  0.040   1.000000e+00        0   \n",
       "513   9.651791e-03    1.583601  0.016  0.006   1.000000e+00        0   \n",
       "514   9.795989e-03    0.644169  0.029  0.054   1.000000e+00        0   \n",
       "515   9.854458e-03    0.705814  0.225  0.186   1.000000e+00        0   \n",
       "516   9.872103e-03    0.776262  0.073  0.048   1.000000e+00        0   \n",
       "\n",
       "             gene  \n",
       "0           RPS12  \n",
       "1            RPS6  \n",
       "2           RPS27  \n",
       "3           RPL32  \n",
       "4           RPS14  \n",
       "..            ...  \n",
       "512  CTD-2336O2.1  \n",
       "513       MTHFD1L  \n",
       "514          RARS  \n",
       "515        GIMAP2  \n",
       "516         TBRG4  \n",
       "\n",
       "[517 rows x 7 columns]"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cluster_0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "2b1eb6c1-300b-4252-adfc-ec6bc55bcfa7",
   "metadata": {},
   "outputs": [],
   "source": [
    "def marker_result(exit):\n",
    "    for x in exit:\n",
    "        for cell, markerList in cell_marker_dict.items():\n",
    "            if x in markerList:\n",
    "                print('marker:',x, '    cell type:', cell)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "6eb14e62-1720-4d6b-b51f-e1dd69649c3e",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "marker: CCR7     cell type: Naive CD4+ T\n",
      "marker: IL7R     cell type: Naive CD4+ T\n",
      "marker: IL7R     cell type: Meomory CD4+\n"
     ]
    }
   ],
   "source": [
    "exit0 = [x for x in cluster_0['gene'].tolist() if x in geneList ]\n",
    "marker_result(exit0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "294952ad-c985-4d88-9395-e0896942961a",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "marker: CD14     cell type: CD14+ Mono\n",
      "marker: CST3     cell type: DC\n",
      "marker: LYZ     cell type: CD14+ Mono\n",
      "marker: S100A4     cell type: Meomory CD4+\n",
      "marker: MS4A7     cell type: FCGR3A+ Mono\n"
     ]
    }
   ],
   "source": [
    "exit1 = [x for x in cluster_1['gene'].tolist() if x in geneList ]\n",
    "marker_result(exit1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "7fe753fa-ae5a-4b17-9ce8-f00e6f8b56e2",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "marker: IL7R     cell type: Naive CD4+ T\n",
      "marker: IL7R     cell type: Meomory CD4+\n"
     ]
    }
   ],
   "source": [
    "exit2 = [x for x in cluster_2['gene'].tolist() if x in geneList ]\n",
    "marker_result(exit2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "6d30e655-d582-49da-b3f8-8cfcef500f05",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "marker: MS4A1     cell type: B\n"
     ]
    }
   ],
   "source": [
    "exit3 = [x for x in cluster_3['gene'].tolist() if x in geneList ]\n",
    "marker_result(exit3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "3b7ad83d-a904-4697-b0c9-718fc62f4493",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "marker: NKG7     cell type: NK\n",
      "marker: CD8A     cell type: CD8+ T\n",
      "marker: GNLY     cell type: NK\n",
      "marker: IL7R     cell type: Naive CD4+ T\n",
      "marker: IL7R     cell type: Meomory CD4+\n"
     ]
    }
   ],
   "source": [
    "exit4 = [x for x in cluster_4['gene'].tolist() if x in geneList ]\n",
    "marker_result(exit4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "0cf283a7-118a-405a-82d2-e8e5baa7b63a",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "marker: MS4A7     cell type: FCGR3A+ Mono\n",
      "marker: FCGR3A     cell type: FCGR3A+ Mono\n",
      "marker: CST3     cell type: DC\n",
      "marker: S100A4     cell type: Meomory CD4+\n"
     ]
    }
   ],
   "source": [
    "exit5 = [x for x in cluster_5['gene'].tolist() if x in geneList ]\n",
    "marker_result(exit5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "8929ca65-e04c-4c52-926c-7ced64d403f5",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "marker: GNLY     cell type: NK\n",
      "marker: NKG7     cell type: NK\n",
      "marker: FCGR3A     cell type: FCGR3A+ Mono\n"
     ]
    }
   ],
   "source": [
    "exit6 = [x for x in cluster_6['gene'].tolist() if x in geneList ]\n",
    "marker_result(exit6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "ae11047a-b707-4d27-b8a8-8a902a06a0ff",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "marker: CST3     cell type: DC\n",
      "marker: LYZ     cell type: CD14+ Mono\n"
     ]
    }
   ],
   "source": [
    "exit7 = [x for x in cluster_7['gene'].tolist() if x in geneList ]\n",
    "marker_result(exit7)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "84745651-83fc-4448-9e2f-040a565feaf3",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "marker: PPBP     cell type: Platelet\n"
     ]
    }
   ],
   "source": [
    "exit8 = [x for x in cluster_8['gene'].tolist() if x in geneList ]\n",
    "marker_result(exit8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "25777919-15f9-48b0-9a8b-042906f07eb6",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "4fc1fcde-debd-46e3-af5d-91a7e042843f",
   "metadata": {},
   "outputs": [],
   "source": [
    "celltype_markers = {\n",
    "    # 造血干细胞及祖细胞\n",
    "    \"HSC\": [\"Lin-\", \"CD34+\", \"CD38-\", \"CD45RA-\", \"CD90+\"],\n",
    "    \"SPINK2+ HSPC\": [\"Lin-\", \"SPINK2+\", \"CD34+\"],\n",
    "    \"HSPC\": [\"Lin-\", \"SPINK2-\", \"CD34+\"],\n",
    "    \"GMP\": [\"Lin-\", \"CD34+\", \"CD38+\", \"CD90+\", \"CD45RA-\"],\n",
    "    \"GMP/Myeloblast\": [\"Lin-\", \"CD34+\", \"CD33+\"],\n",
    "    \"Early Myeloid Progenitor\": [\"CD33+\", \"MPO+\", \"CD11B-lo\"],\n",
    "    \"Intermediate Myeloid\": [\"CD33-mid\", \"MPO-mid\", \"CD11B+\"],\n",
    "    \"Mature Myeloid\": [\"CD33-lo\", \"MPO-lo\", \"CD11B+\", \"CD141+\"],\n",
    "    \n",
    "    # 髓系细胞\n",
    "    \"Monocytes\": [\"CD14+\"],\n",
    "    \"Non-Classical Monocyte\": [\"CD14-lo\", \"CD11C+\", \"HLA-DR++\"],\n",
    "    \"Macrophages\": [\"VCAM1+\", \"CD68+\", \"CD163+\"],\n",
    "    \"pDC\": [\"CD123+\", \"CD34-\"],\n",
    "    \n",
    "    # 淋系细胞\n",
    "    \"CLP\": [\"Lin-\", \"CD34+\", \"CD38-\", \"CD45RA+\"],\n",
    "    \"Immature_B_Cell\": [\"Pax5+\", \"CD79A+\", \"CD38-lo/mid\"],\n",
    "    \"B-Cells\": [\"Pax5-\", \"CD79A+\", \"CD38-lo/mid\"],\n",
    "    \"CD4+ T-Cell\": [\"CD3e+\", \"CD4+\"],\n",
    "    \"CD8+ T-Cell\": [\"CD3e+\", \"CD8+\"],\n",
    "    \"Plasma Cells\": [\"CD79A+\", \"CD38+++\", \"CD138+\"],\n",
    "    \n",
    "    # 红系/巨核系\n",
    "    \"MEP/Early Erythroblast\": [\"Lin-\", \"CD34+\", \"GATA1+\"],\n",
    "    \"CD34+ CD61+\": [\"Lin-\", \"CD34+\", \"CD61+\"],\n",
    "    \"Erythroblast\": [\"GATA1+\", \"CD71+\", \"GYPC+\"],\n",
    "    \"Erythroid\": [\"GYPC+\", \"CD71+\"],\n",
    "    \"GATA1neg_Mks\": [\"GATA1-\", \"CD61+\", \"TGFB1+\"],\n",
    "    \"GATA1pos_Mks\": [\"GATA1+\", \"CD61+\", \"TGFB1+\"],\n",
    "    \n",
    "    # 间充质/基质细胞\n",
    "    \"Adipo-MSC\": [\"FOXC1+\", \"CXCL12+\", \"CD90-lo\"],\n",
    "    \"THY1+ MSC\": [\"FOXC1+\", \"CXCL12+\", \"CD90-hi\"],\n",
    "    \"Adipocyte\": [\"CD146+\", \"CD138+\"],\n",
    "    \"Endosteal\": [\"CD56+\", \"VIM+\", \"cluster spatial location\"],\n",
    "    \"AEC\": [\"CXCL12+\", \"VE-Cadherin+\"],\n",
    "    \"SEC\": [\"VE-Cadherin+\", \"CD34+\", \"CXCL12-\"],\n",
    "    \"VSMC\": [\"ASMA+\", \"VE-Cadherin-\"],\n",
    "    \"Schwann Cell\": [\"PLP1+\", \"CD271+\"]\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "9ca03c7a-10cd-4239-af56-9033ee8be83b",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['HSC',\n",
       " 'SPINK2+ HSPC',\n",
       " 'HSPC',\n",
       " 'GMP',\n",
       " 'GMP/Myeloblast',\n",
       " 'Early Myeloid Progenitor',\n",
       " 'Intermediate Myeloid',\n",
       " 'Mature Myeloid',\n",
       " 'Monocytes',\n",
       " 'Non-Classical Monocyte',\n",
       " 'Macrophages',\n",
       " 'pDC',\n",
       " 'CLP',\n",
       " 'Immature_B_Cell',\n",
       " 'B-Cells',\n",
       " 'CD4+ T-Cell',\n",
       " 'CD8+ T-Cell',\n",
       " 'Plasma Cells',\n",
       " 'MEP/Early Erythroblast',\n",
       " 'CD34+ CD61+',\n",
       " 'Erythroblast',\n",
       " 'Erythroid',\n",
       " 'GATA1neg_Mks',\n",
       " 'GATA1pos_Mks',\n",
       " 'Adipo-MSC',\n",
       " 'THY1+ MSC',\n",
       " 'Adipocyte',\n",
       " 'Endosteal',\n",
       " 'AEC',\n",
       " 'SEC',\n",
       " 'VSMC',\n",
       " 'Schwann Cell']"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(celltype_markers.keys())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "4e270cf4-1e30-40f0-882c-fab3a19a4b24",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "32"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(list(celltype_markers.keys()))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "748176b7-e40e-40b7-b56f-9569557a1227",
   "metadata": {},
   "source": [
    "# cell paper data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "723882df-005d-4f60-9e7d-dc6421432bbf",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>p_val</th>\n",
       "      <th>avg_log2FC</th>\n",
       "      <th>pct.1</th>\n",
       "      <th>pct.2</th>\n",
       "      <th>p_val_adj</th>\n",
       "      <th>cluster</th>\n",
       "      <th>gene</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>MZB1</td>\n",
       "      <td>1.728998e-253</td>\n",
       "      <td>2.631042</td>\n",
       "      <td>0.992</td>\n",
       "      <td>0.308</td>\n",
       "      <td>5.092245e-249</td>\n",
       "      <td>0</td>\n",
       "      <td>MZB1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SEC11C</td>\n",
       "      <td>6.000225e-250</td>\n",
       "      <td>2.629544</td>\n",
       "      <td>0.989</td>\n",
       "      <td>0.373</td>\n",
       "      <td>1.767186e-245</td>\n",
       "      <td>0</td>\n",
       "      <td>SEC11C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>DERL3</td>\n",
       "      <td>4.216278e-247</td>\n",
       "      <td>2.756492</td>\n",
       "      <td>0.982</td>\n",
       "      <td>0.203</td>\n",
       "      <td>1.241778e-242</td>\n",
       "      <td>0</td>\n",
       "      <td>DERL3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>FKBP11</td>\n",
       "      <td>2.483511e-243</td>\n",
       "      <td>2.595957</td>\n",
       "      <td>0.984</td>\n",
       "      <td>0.346</td>\n",
       "      <td>7.314436e-239</td>\n",
       "      <td>0</td>\n",
       "      <td>FKBP11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>SSR4</td>\n",
       "      <td>4.589469e-241</td>\n",
       "      <td>2.497164</td>\n",
       "      <td>0.995</td>\n",
       "      <td>0.717</td>\n",
       "      <td>1.351690e-236</td>\n",
       "      <td>0</td>\n",
       "      <td>SSR4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>504629</th>\n",
       "      <td>TRIM46.17</td>\n",
       "      <td>9.913514e-03</td>\n",
       "      <td>1.792502</td>\n",
       "      <td>0.047</td>\n",
       "      <td>0.011</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>44</td>\n",
       "      <td>TRIM46</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>504630</th>\n",
       "      <td>TM4SF1.35</td>\n",
       "      <td>9.925528e-03</td>\n",
       "      <td>-5.324712</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.105</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>44</td>\n",
       "      <td>TM4SF1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>504631</th>\n",
       "      <td>ADCY2.42</td>\n",
       "      <td>9.925528e-03</td>\n",
       "      <td>-4.398575</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.093</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>44</td>\n",
       "      <td>ADCY2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>504632</th>\n",
       "      <td>VASH1.32</td>\n",
       "      <td>9.925528e-03</td>\n",
       "      <td>-2.592445</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.091</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>44</td>\n",
       "      <td>VASH1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>504633</th>\n",
       "      <td>FABP4.37</td>\n",
       "      <td>9.954299e-03</td>\n",
       "      <td>-6.738506</td>\n",
       "      <td>0.031</td>\n",
       "      <td>0.143</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>44</td>\n",
       "      <td>FABP4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>504634 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       Unnamed: 0          p_val  avg_log2FC  pct.1  pct.2      p_val_adj  \\\n",
       "0            MZB1  1.728998e-253    2.631042  0.992  0.308  5.092245e-249   \n",
       "1          SEC11C  6.000225e-250    2.629544  0.989  0.373  1.767186e-245   \n",
       "2           DERL3  4.216278e-247    2.756492  0.982  0.203  1.241778e-242   \n",
       "3          FKBP11  2.483511e-243    2.595957  0.984  0.346  7.314436e-239   \n",
       "4            SSR4  4.589469e-241    2.497164  0.995  0.717  1.351690e-236   \n",
       "...           ...            ...         ...    ...    ...            ...   \n",
       "504629  TRIM46.17   9.913514e-03    1.792502  0.047  0.011   1.000000e+00   \n",
       "504630  TM4SF1.35   9.925528e-03   -5.324712  0.000  0.105   1.000000e+00   \n",
       "504631   ADCY2.42   9.925528e-03   -4.398575  0.000  0.093   1.000000e+00   \n",
       "504632   VASH1.32   9.925528e-03   -2.592445  0.000  0.091   1.000000e+00   \n",
       "504633   FABP4.37   9.954299e-03   -6.738506  0.031  0.143   1.000000e+00   \n",
       "\n",
       "        cluster    gene  \n",
       "0             0    MZB1  \n",
       "1             0  SEC11C  \n",
       "2             0   DERL3  \n",
       "3             0  FKBP11  \n",
       "4             0    SSR4  \n",
       "...         ...     ...  \n",
       "504629       44  TRIM46  \n",
       "504630       44  TM4SF1  \n",
       "504631       44   ADCY2  \n",
       "504632       44   VASH1  \n",
       "504633       44   FABP4  \n",
       "\n",
       "[504634 rows x 8 columns]"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd \n",
    "path = \"./SB66_combined_markers_2000_ident1000_20250725.csv\"\n",
    "path_3000 = \"../output/SB66_combined_markers_3000_ident1000_20250728.csv\"\n",
    "path_all = \"../output/SB66_combined_markers_ident1000_20250728.csv\"\n",
    "path_our = \"../../ourData/output/SB66_combined_markers_ident1000_20250729_2.csv\"\n",
    "# path = \"/lustre/home/acct-medfzx/medfzx-lkw/project/bone/2024paper/output/SB66_combined_markers_2000_ident1000_20250725.csv\"\n",
    "df = pd.read_csv(path_all)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b22e55af-7157-4f85-883a-db9f683d5d7b",
   "metadata": {},
   "outputs": [],
   "source": [
    "markers_list <- list(\n",
    "  # 造血干细胞及祖细胞\n",
    "  HSC = c(\"Lin-\", \"CD34+\", \"CD38-\", \"CD45RA-\", \"CD90+\"),\n",
    "  SPINK2_HSPC = c(\"Lin-\", \"SPINK2+\", \"CD34+\"),\n",
    "  HSPC = c(\"Lin-\", \"SPINK2-\", \"CD34+\"),\n",
    "  GMP = c(\"Lin-\", \"CD34+\", \"CD38+\", \"CD90+\", \"CD45RA-\"),\n",
    "  GMP_Myeloblast = c(\"Lin-\", \"CD34+\", \"CD33+\"),\n",
    "  Early_Myeloid_Progenitor = c(\"CD33+\", \"MPO+\", \"CD11B-lo\"),\n",
    "  Intermediate_Myeloid = c(\"CD33-mid\", \"MPO-mid\", \"CD11B+\"),\n",
    "  Mature_Myeloid = c(\"CD33-lo\", \"MPO-lo\", \"CD11B+\", \"CD141+\"),\n",
    "  \n",
    "  # 髓系细胞    一大类免疫细胞，起源于骨髓中的​​共同髓系祖细胞（CMP）​​，参与先天免疫、炎症反应和组织稳态维持  \n",
    "  Monocytes = c(\"CD14+\"),   ## ​​经典单核细胞（CD14hiCD16-）​​：占90%，高吞噬活性，参与急性炎症反应   非经典单核细胞（CD14-loCD16+）​​：巡逻血管内皮，监测病原体并参与抗病毒反应\n",
    "  Non_Classical_Monocyte = c(\"CD14-lo\", \"CD11C+\", \"HLA-DR++\"),   # \n",
    "  Macrophages = c(\"VCAM1+\", \"CD68+\", \"CD163+\"),   # ​​组织特异性​​：如肝脏库普弗细胞、脑小胶质细胞等，均源自胚胎或单核细胞分化\n",
    "  pDC = c(\"CD123+\", \"CD34-\"),\n",
    "  \n",
    "  # 淋系细胞\n",
    "  CLP = c(\"Lin-\", \"CD34+\", \"CD38-\", \"CD45RA+\"),\n",
    "  Immature_B_Cell = c(\"Pax5+\", \"CD79A+\", \"CD38-lo/mid\"),\n",
    "  B_Cells = c(\"Pax5-\", \"CD79A+\", \"CD38-lo/mid\"),\n",
    "  CD4_T_Cell = c(\"CD3e+\", \"CD4+\"),\n",
    "  CD8_T_Cell = c(\"CD3e+\", \"CD8+\"),\n",
    "  Plasma_Cells = c(\"CD79A+\", \"CD38+++\", \"CD138+\"),\n",
    "  \n",
    "  # 红系/巨核系\n",
    "  MEP_Early_Erythroblast = c(\"Lin-\", \"CD34+\", \"GATA1+\"),\n",
    "  CD34_CD61 = c(\"Lin-\", \"CD34+\", \"CD61+\"),\n",
    "  Erythroblast = c(\"GATA1+\", \"CD71+\", \"GYPC+\"),\n",
    "  Erythroid = c(\"GYPC+\", \"CD71+\"),\n",
    "  GATA1neg_Mks = c(\"GATA1-\", \"CD61+\", \"TGFB1+\"),\n",
    "  GATA1pos_Mks = c(\"GATA1+\", \"CD61+\", \"TGFB1+\"),\n",
    "  \n",
    "  # 间充质/基质细胞\n",
    "  Adipo_MSC = c(\"FOXC1+\", \"CXCL12+\", \"CD90-lo\"),\n",
    "  THY1_MSC = c(\"FOXC1+\", \"CXCL12+\", \"CD90-hi\"),\n",
    "  Adipocyte = c(\"CD146+\", \"CD138+\"),\n",
    "  Endosteal = c(\"CD56+\", \"VIM+\", \"cluster spatial location\"),\n",
    "  AEC = c(\"CXCL12+\", \"VE-Cadherin+\"),\n",
    "  SEC = c(\"VE-Cadherin+\", \"CD34+\", \"CXCL12-\"),\n",
    "  VSMC = c(\"ASMA+\", \"VE-Cadherin-\"),\n",
    "  Schwann_Cell = c(\"PLP1+\", \"CD271+\")\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "21127758-68f2-42cd-862a-9f80f0cd1f02",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "markers_dict = {\n",
    "    # 造血干细胞及祖细胞\n",
    "    \"HSC\": [\"Lin-\", \"CD34+\", \"CD38-\", \"PTPRC-\", \"THY1+\"],\n",
    "    \"SPINK2_HSPC\": [\"Lin-\", \"SPINK2+\", \"CD34+\"],\n",
    "    \"HSPC\": [\"Lin-\", \"SPINK2-\", \"CD34+\"],\n",
    "    \"GMP\": [\"Lin-\", \"CD34+\", \"CD38+\", \"THY1+\", \"PTPRC-\"],\n",
    "    \"GMP_Myeloblast\": [\"Lin-\", \"CD34+\", \"CD33+\"],\n",
    "    \"Early_Myeloid_Progenitor\": [\"CD33+\", \"MPO+\", \"ITGAM-lo\"],\n",
    "    \"Intermediate_Myeloid\": [\"CD33-mid\", \"MPO-mid\", \"ITGAM+\"],\n",
    "    \"Mature_Myeloid\": [\"CD33-lo\", \"MPO-lo\", \"ITGAM+\", \"THBD+\"],\n",
    "    \n",
    "    # 髓系细胞\n",
    "    \"Monocytes\": [\"CD14+\"],\n",
    "    \"Non_Classical_Monocyte\": [\"CD14-lo\", \"ITGAX+\", \"HLA-DRA++\"],\n",
    "    \"Macrophages\": [\"VCAM1+\", \"CD68+\", \"CD163+\"],\n",
    "    \"pDC\": [\"IL3RA+\", \"CD34-\"],\n",
    "    \n",
    "    # 淋系细胞\n",
    "    \"CLP\": [\"Lin-\", \"CD34+\", \"CD38-\", \"PTPRC+\"],\n",
    "    \"Immature_B_Cell\": [\"PAX5+\", \"CD79A+\", \"CD38-lo/mid\"],\n",
    "    \"B_Cells\": [\"PAX5-\", \"CD79A+\", \"CD38-lo/mid\"],\n",
    "    \"CD4_T_Cell\": [\"CD3E+\", \"CD4+\"],\n",
    "    \"CD8_T_Cell\": [\"CD3E+\", \"CD8A+\"],\n",
    "    \"Plasma_Cells\": [\"CD79A+\", \"CD38+++\", \"SDC1+\"],\n",
    "    \n",
    "    # 红系/巨核系\n",
    "    \"MEP_Early_Erythroblast\": [\"Lin-\", \"CD34+\", \"GATA1+\"],\n",
    "    \"CD34_CD61\": [\"Lin-\", \"CD34+\", \"ITGB3+\"],\n",
    "    \"Erythroblast\": [\"GATA1+\", \"TFRC+\", \"GYPC+\"],\n",
    "    \"Erythroid\": [\"GYPC+\", \"TFRC+\"],\n",
    "    \"GATA1neg_Mks\": [\"GATA1-\", \"ITGB3+\", \"TGFB1+\"],\n",
    "    \"GATA1pos_Mks\": [\"GATA1+\", \"ITGB3+\", \"TGFB1+\"],\n",
    "    \n",
    "    # 间充质/基质细胞\n",
    "    \"Adipo_MSC\": [\"FOXC1+\", \"CXCL12+\", \"THY1-lo\"],\n",
    "    \"THY1_MSC\": [\"FOXC1+\", \"CXCL12+\", \"THY1-hi\"],\n",
    "    \"Adipocyte\": [\"MCAM+\", \"SDC1+\"],\n",
    "    \"Endosteal\": [\"NCAM1+\", \"VIM+\", \"cluster_spatial_location\"],\n",
    "    \"AEC\": [\"CXCL12+\", \"CDH5+\"],\n",
    "    \"SEC\": [\"CDH5+\", \"CD34+\", \"CXCL12-\"],\n",
    "    \"VSMC\": [\"ACTA2\", \"CDH5\"],\n",
    "    \"Schwann_Cell\": [\"PLP1+\", \"NGFR+\"]\n",
    "}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "id": "daf6a9e0-2665-463d-b45a-87cd2b42273d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# ■T 细胞：CD3\n",
    "# ■B 细胞：CD19 或 CD20\n",
    "# ■NK 细胞：CD56\n",
    "# ■单核细胞/巨噬细胞：CD14\n",
    "# ■粒细胞：CD66b\n",
    "\n",
    "lin_dict ={'lin' : [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-']]}\n",
    "\n",
    "linList = ['CD3E', 'CD19', 'NCAM1', 'CD14', 'CEACAM8']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "b897334c-1387-4537-8b1c-6c4a07b6bc3a",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "markers_dict = {\n",
    "    \n",
    "    # 造血干细胞及祖细胞\n",
    "    \"HSC\": [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-'], [\"CD34\",\"+\"], [\"CD38\", \"-\"], [\"PTPRC\", \"-\"], [\"THY1\", \"+\"]],   # +，++，+++，-，lo，mid，hi, lo/mid\n",
    "    \"SPINK2_HSPC\": [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-'], [\"SPINK2\", \"+\"], [\"CD34\", \"+\"]],\n",
    "    \"HSPC\": [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-'], [\"SPINK2\", \"-\"], [\"CD34\",\"+\"]],\n",
    "    \"GMP\": [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-'], [\"CD34\", \"+\"], [\"CD38\", \"+\"], [\"THY1\", \"+\"], [\"PTPRC\", \"-\"]],\n",
    "    \"GMP_Myeloblast\": [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-'], [\"CD34\", \"+\"], [\"CD33\", \"+\"]],\n",
    "    \"Early_Myeloid_Progenitor\": [[\"CD33\", \"+\"], [\"MPO\", \"+\"], [\"ITGAM\", \"lo\"]],\n",
    "    \"Intermediate_Myeloid\": [[\"CD33\", \"mid\"], [\"MPO\", \"mid\"], [\"ITGAM\", \"+\"]],\n",
    "    \"Mature_Myeloid\": [[\"CD33\", \"lo\"], [\"MPO\", \"lo\"], [\"ITGAM\", \"+\"], [\"THBD\", \"+\"]],\n",
    "    \n",
    "    # 髓系细胞\n",
    "    \"Monocytes\": [[\"CD14\", \"+\"]],\n",
    "    \"Non_Classical_Monocyte\": [[\"CD14\", \"lo\"], [\"ITGAX\", \"+\"], [\"HLA-DRA\", \"++\"]],\n",
    "    \"Macrophages\": [[\"VCAM1\", \"+\"], [\"CD68\", \"+\"], [\"CD163\", \"+\"]],\n",
    "    \"pDC\": [[\"IL3RA\", \"+\"], [\"CD34\", \"-\"]],\n",
    "    \n",
    "    # 淋系细胞\n",
    "    \"CLP\": [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-'], [\"CD34\", \"+\"], [\"CD38\", \"-\"], [\"PTPRC\", \"+\"]],\n",
    "    \"Immature_B_Cell\": [[\"PAX5\", \"+\"], [\"CD79A\", \"+\"], [\"CD38\", \"lo/mid\"]],\n",
    "    \"B_Cells\": [[\"PAX5\", \"-\"], [\"CD79A\", \"+\"], [\"CD38\", \"lo/mid\"]],\n",
    "    \"CD4_T_Cell\": [[\"CD3E\", \"+\"], [\"CD4\", \"+\"]],\n",
    "    \"CD8_T_Cell\": [[\"CD3E\", \"+\"], [\"CD8A\", \"+\"]],\n",
    "    \"Plasma_Cells\": [[\"CD79A\", \"+\"], [\"CD38\", \"+++\"], [\"SDC1\", \"+\"]],\n",
    "    \n",
    "    # 红系/巨核系\n",
    "    \"MEP_Early_Erythroblast\": [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-'], [\"CD34\", \"+\"], [\"GATA1\", \"+\"]],\n",
    "    \"CD34_CD61\": [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-'], [\"CD34\", \"+\"], [\"ITGB3\", \"+\"]],\n",
    "    \"Erythroblast\": [[\"GATA1\", \"+\"], [\"TFRC\", \"+\"], [\"GYPC\", \"+\"]],\n",
    "    \"Erythroid\": [[\"GYPC\", \"+\"], [\"TFRC\", \"+\"]],\n",
    "    \"GATA1neg_Mks\": [[\"GATA1\", \"-\"], [\"ITGB3\", \"+\"], [\"TGFB1\", \"+\"]],\n",
    "    \"GATA1pos_Mks\": [[\"GATA1\", \"+\"], [\"ITGB3\", \"+\"], [\"TGFB1\", \"+\"]],\n",
    "    \n",
    "    # 间充质/基质细胞\n",
    "    \"Adipo_MSC\": [[\"FOXC1\", \"+\"], [\"CXCL12\", \"+\"], [\"THY1\",\"lo\"]],\n",
    "    \"THY1_MSC\": [[\"FOXC1\", \"+\"], [\"CXCL12\", \"+\"], [\"THY1\", \"hi\"]],\n",
    "    \"Adipocyte\": [[\"MCAM\", \"+\"], [\"SDC1\", \"+\"]],\n",
    "    \"Endosteal\": [[\"NCAM1\", \"+\"], [\"VIM\", \"+\"], [\"cluster_spatial_location\", \".\"]],\n",
    "    \"AEC\": [[\"CXCL12\", \"+\"], [\"CDH5\", \"+\"]],\n",
    "    \"SEC\": [[\"CDH5\", \"+\"], [\"CD34\", \"+\"], [\"CXCL12\", \"-\"]],\n",
    "    \"VSMC\": [[\"ACTA2\", \"+\"], [\"CDH5\", \"-\"]],\n",
    "    \"Schwann_Cell\": [[\"PLP1\", \"+\"], [\"NGFR\", \"+\"]]\n",
    "}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "83f5de4f-a3da-4ca3-b443-c5b5c02b90e5",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "38"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "unique_genes = list({gene[0] for markers in markers_dict.values() for gene in markers if not gene[0].startswith('cluster') }) \n",
    "\n",
    "len(unique_genes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "2e6fd303-6d4d-479e-b3ee-60c79587ef3c",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['CDH5', 'CD163', 'GYPC', 'IL3RA', 'CD14', 'CD19', 'HLA-DRA', 'CD34', 'NCAM1', 'TGFB1', 'NGFR', 'CD38', 'CD4', 'SDC1', 'THBD', 'THY1', 'PLP1', 'PTPRC', 'CD33', 'SPINK2', 'PAX5', 'CD8A', 'TFRC', 'CD3E', 'ITGAX', 'FOXC1', 'CXCL12', 'CD68', 'ITGAM', 'VCAM1', 'ITGB3', 'VIM', 'GATA1', 'MPO', 'MCAM', 'CD79A', 'CEACAM8', 'ACTA2']\n"
     ]
    }
   ],
   "source": [
    "print(unique_genes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "id": "ccb28646-443d-4a46-b499-6bf0ca5965e2",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "linList = ['CD3E', 'CD19', 'NCAM1', 'CD14', 'CEACAM8']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "id": "239ddab1-492f-4d21-a9b5-d453885b90aa",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "redup = [x for x in linList if x not in df[\"gene\"].tolist()]\n",
    "print(len(redup))\n",
    "redup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "59a73377-2889-4c1a-89c4-00c1e49a750e",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['MZB1',\n",
       " 'SEC11C',\n",
       " 'DERL3',\n",
       " 'FKBP11',\n",
       " 'SSR4',\n",
       " 'SLAMF7',\n",
       " 'TNFRSF17',\n",
       " 'FCRL5',\n",
       " 'SPAG4',\n",
       " 'HERPUD1',\n",
       " 'IFNG-AS1',\n",
       " 'SPCS2',\n",
       " 'TPD52',\n",
       " 'CD27',\n",
       " 'TNFRSF13B',\n",
       " 'SPCS3',\n",
       " 'SDF2L1',\n",
       " 'PRDX4',\n",
       " 'CYBA',\n",
       " 'FKBP2',\n",
       " 'PLPP5',\n",
       " 'POU2AF1',\n",
       " 'VOPP1',\n",
       " 'SPCS1',\n",
       " 'TENT5C',\n",
       " 'EIF2AK3',\n",
       " 'DUSP5',\n",
       " 'ZEB2',\n",
       " 'PAIP2B',\n",
       " 'SDC1',\n",
       " 'ERLEC1',\n",
       " 'PELI1',\n",
       " 'MEI1',\n",
       " 'SSR3',\n",
       " 'RAB30',\n",
       " 'COBLL1',\n",
       " 'CYTOR',\n",
       " 'TXNDC5',\n",
       " 'MYDGF',\n",
       " 'TMEM156',\n",
       " 'LINC00513',\n",
       " 'DEK',\n",
       " 'CRELD2',\n",
       " 'JSRP1',\n",
       " 'DNAJB9',\n",
       " 'ISG20',\n",
       " 'GNAQ',\n",
       " 'FAM49A',\n",
       " 'PRDM1',\n",
       " 'SCNN1B',\n",
       " 'RAPGEF1',\n",
       " 'PIM2',\n",
       " 'KRTCAP2',\n",
       " 'MANF',\n",
       " 'RHOH',\n",
       " 'ANKRD28',\n",
       " 'FAAH2',\n",
       " 'CD38',\n",
       " 'ITM2C',\n",
       " 'PDK1',\n",
       " 'ST6GAL1',\n",
       " 'HSP90B1',\n",
       " 'XBP1',\n",
       " 'RASGRP3',\n",
       " 'HLA-DOB',\n",
       " 'TRIB1',\n",
       " 'TBC1D9',\n",
       " 'SELENOS',\n",
       " 'GNG7',\n",
       " 'CARMIL1',\n",
       " 'TMED9',\n",
       " 'CLPTM1L',\n",
       " 'SEL1L3',\n",
       " 'AC012368.1',\n",
       " 'AUTS2',\n",
       " 'TNFRSF18',\n",
       " 'ACTB',\n",
       " 'BLNK',\n",
       " 'LMAN2',\n",
       " 'IGLV3-1',\n",
       " 'SEL1L',\n",
       " 'SLAMF1',\n",
       " 'TMEM238',\n",
       " 'TMSB4X',\n",
       " 'LRMDA',\n",
       " 'CREB3L2',\n",
       " 'BIRC3',\n",
       " 'H3F3A',\n",
       " 'LY96',\n",
       " 'TMEM59',\n",
       " 'CPNE5',\n",
       " 'TMEM258',\n",
       " 'PNOC',\n",
       " 'BMP6',\n",
       " 'FOXP1',\n",
       " 'TRPS1',\n",
       " 'ZNF215',\n",
       " 'AC016831.7',\n",
       " 'PDIA6',\n",
       " 'FBP1',\n",
       " 'ADA2',\n",
       " 'NUGGC',\n",
       " 'ERN1',\n",
       " 'FAM92B',\n",
       " 'AC078883.1',\n",
       " 'CYTIP',\n",
       " 'HMGB1',\n",
       " 'ACTN1',\n",
       " 'CHPF',\n",
       " 'AQP3',\n",
       " 'AC058791.1',\n",
       " 'ANXA1',\n",
       " 'ZBP1',\n",
       " 'IRF4',\n",
       " 'TRPV3',\n",
       " 'IGFBP7',\n",
       " 'FTH1',\n",
       " 'LY9',\n",
       " 'CCDC88C',\n",
       " 'UBC',\n",
       " 'TGFBR2',\n",
       " 'PTPN1',\n",
       " 'SEC61B',\n",
       " 'IKZF3',\n",
       " 'KLHL6',\n",
       " 'SLC2A3',\n",
       " 'SELENOK',\n",
       " 'LMAN1',\n",
       " 'OTUD1',\n",
       " 'ICAM3',\n",
       " 'AC008014.1',\n",
       " 'PLXDC2',\n",
       " 'TTLL7',\n",
       " 'IL2RG',\n",
       " 'DDOST',\n",
       " 'TP53INP1',\n",
       " 'CTBP2',\n",
       " 'PTPN12',\n",
       " 'IFITM3',\n",
       " 'TFPI',\n",
       " 'EZR',\n",
       " 'CTHRC1',\n",
       " 'BFSP2',\n",
       " 'RFTN1',\n",
       " 'PARM1',\n",
       " 'EAF2',\n",
       " 'REL',\n",
       " 'IRAK3',\n",
       " 'MSRB3',\n",
       " 'FNDC3A',\n",
       " 'AC007569.1',\n",
       " 'PLCB1',\n",
       " 'SRPRA',\n",
       " 'COL4A4',\n",
       " 'LYPD6B',\n",
       " 'SRPRB',\n",
       " 'ANKRD37',\n",
       " 'AMPD1',\n",
       " 'AC092546.1',\n",
       " 'IGF2BP2',\n",
       " 'HIF1A',\n",
       " 'AC012236.1',\n",
       " 'AC007952.4',\n",
       " 'MYO1D',\n",
       " 'STAP1',\n",
       " 'HLA-C',\n",
       " 'TMEM205',\n",
       " 'TXNDC15',\n",
       " 'RALGPS2',\n",
       " 'AC016831.5',\n",
       " 'PTMA',\n",
       " 'DAPP1',\n",
       " 'HM13',\n",
       " 'TXNDC11',\n",
       " 'SOX4',\n",
       " 'A1BG',\n",
       " 'RABAC1',\n",
       " 'GALNT2',\n",
       " 'KLF6',\n",
       " 'BHLHE41',\n",
       " 'EVI2B',\n",
       " 'IGF1',\n",
       " 'HSD17B8',\n",
       " 'MBNL2',\n",
       " 'GMPPB',\n",
       " 'CDC42BPA',\n",
       " 'PLSCR1',\n",
       " 'EIF2AK4',\n",
       " 'SERPINB9',\n",
       " 'AC016831.1',\n",
       " 'TIAM1',\n",
       " 'CADPS2',\n",
       " 'SVIL',\n",
       " 'EBF1',\n",
       " 'CD79A',\n",
       " 'BCL2',\n",
       " 'GSN',\n",
       " 'NPC2',\n",
       " 'TCF7L2',\n",
       " 'TMEM208',\n",
       " 'C16orf74',\n",
       " 'TXLNB',\n",
       " 'DERL1',\n",
       " 'RASSF6',\n",
       " 'C11orf80',\n",
       " 'HIPK2',\n",
       " 'FYN',\n",
       " 'PREX1',\n",
       " 'HDLBP',\n",
       " 'CUTA',\n",
       " 'TUBA1A',\n",
       " 'LINC00582',\n",
       " 'SAMD12',\n",
       " 'CD99',\n",
       " 'HMCES',\n",
       " 'CLDN14',\n",
       " 'APP',\n",
       " 'IGKC',\n",
       " 'DNAAF1',\n",
       " 'RAB11FIP1',\n",
       " 'DOCK1',\n",
       " 'PDIA4',\n",
       " 'IGLV10-54',\n",
       " 'ETS2',\n",
       " 'DENND5A',\n",
       " 'ADTRP',\n",
       " 'GPR176',\n",
       " 'TMBIM6',\n",
       " 'LAX1',\n",
       " 'AC007384.1',\n",
       " 'TSC22D1',\n",
       " 'CNTLN',\n",
       " 'B2M',\n",
       " 'PTPRM',\n",
       " 'OSBPL3',\n",
       " 'ARSA',\n",
       " 'PDE4B',\n",
       " 'HNRNPA2B1',\n",
       " 'PERP',\n",
       " 'PTPRC',\n",
       " 'SLC41A2',\n",
       " 'PTEN',\n",
       " 'DENND2C',\n",
       " 'CASP10',\n",
       " 'INSR',\n",
       " 'AGA',\n",
       " 'ELL2',\n",
       " 'IGHG3',\n",
       " 'CEBPD',\n",
       " 'PPP1R12A',\n",
       " 'QPCT',\n",
       " 'DOCK5',\n",
       " 'CD48',\n",
       " 'SESN3',\n",
       " 'BCL2L11',\n",
       " 'DPYSL2',\n",
       " 'HSPA13',\n",
       " 'HLA-DRB1',\n",
       " 'SPINT2',\n",
       " 'CFLAR',\n",
       " 'MIR155HG',\n",
       " 'IGHG4',\n",
       " 'IGHG1',\n",
       " 'AC021678.2',\n",
       " 'KCNN3',\n",
       " 'IFITM2',\n",
       " 'POU2F2',\n",
       " 'FCGR2B',\n",
       " 'TACC1',\n",
       " 'AL121944.1',\n",
       " 'FICD',\n",
       " 'TRAM1',\n",
       " 'PTK2',\n",
       " 'ST6GALNAC4',\n",
       " 'NCOA3',\n",
       " 'TEX14',\n",
       " 'DCC',\n",
       " 'CCND2',\n",
       " 'ETS1',\n",
       " 'HDGFL3',\n",
       " 'KDELR1',\n",
       " 'UFM1',\n",
       " 'CTNNB1',\n",
       " 'COTL1',\n",
       " 'RHBDD1',\n",
       " 'UBE2J1',\n",
       " 'IGLL5',\n",
       " 'HLA-DPB1',\n",
       " 'IL5RA',\n",
       " 'SMAP2',\n",
       " 'BCAR3',\n",
       " 'IGHG2',\n",
       " 'LHFPL6',\n",
       " 'TOR3A',\n",
       " 'TMEM19',\n",
       " 'LMF1',\n",
       " 'CERS6',\n",
       " 'ITGA6',\n",
       " 'MT-CO3',\n",
       " 'ICAM2',\n",
       " 'MT-ND1',\n",
       " 'TNFRSF13C',\n",
       " 'AC022182.1',\n",
       " 'CTSS',\n",
       " 'IL10RA',\n",
       " 'TNIP1',\n",
       " 'HYOU1',\n",
       " 'FUS',\n",
       " 'LMO2',\n",
       " 'ZFHX3',\n",
       " 'MAP4K4',\n",
       " 'QPRT',\n",
       " 'AC087280.2',\n",
       " 'SP140',\n",
       " 'TNFRSF1A',\n",
       " 'GAB1',\n",
       " 'SELENOM',\n",
       " 'SLC25A4',\n",
       " 'TNFRSF4',\n",
       " 'KDELR2',\n",
       " 'AP002852.1',\n",
       " 'CAVIN1',\n",
       " 'GRB10',\n",
       " 'RUNX1',\n",
       " 'BRSK1',\n",
       " 'ADAM19',\n",
       " 'NUCB2',\n",
       " 'RBMS3',\n",
       " 'TMED4',\n",
       " 'WDFY3',\n",
       " 'FBN1',\n",
       " 'UBALD2',\n",
       " 'ZFAS1',\n",
       " 'MANEA',\n",
       " 'SPATS2',\n",
       " 'SLC1A4',\n",
       " 'TMC3-AS1',\n",
       " 'SH3RF3',\n",
       " 'ARFGEF3',\n",
       " 'TMED10',\n",
       " 'RIPOR2',\n",
       " 'SEC24A',\n",
       " 'TSTD1',\n",
       " 'FRMD4B',\n",
       " 'FBXW7',\n",
       " 'SYVN1',\n",
       " 'CLEC11A',\n",
       " 'WNT10A',\n",
       " 'TRAF3',\n",
       " 'PGM3',\n",
       " 'ABCB9',\n",
       " 'SLC7A1',\n",
       " 'CFH',\n",
       " 'SERPING1',\n",
       " 'HSH2D',\n",
       " 'TAGAP',\n",
       " 'EIF4G3',\n",
       " 'UBAC2',\n",
       " 'ABCA1',\n",
       " 'FGFR1',\n",
       " 'VCL',\n",
       " 'C1orf21',\n",
       " 'ITGA5',\n",
       " 'HLA-DRA',\n",
       " 'SLC7A7',\n",
       " 'MOXD1',\n",
       " 'IGKV4-1',\n",
       " 'ACTG1',\n",
       " 'NNMT',\n",
       " 'DM1-AS',\n",
       " 'MT-ND4',\n",
       " 'MERTK',\n",
       " 'TIMP3',\n",
       " 'LSR',\n",
       " 'CDK2AP2',\n",
       " 'IL1R1',\n",
       " 'HNRNPU',\n",
       " 'YBX3',\n",
       " 'GRIK4',\n",
       " 'CDKN1A',\n",
       " 'MT-CO1',\n",
       " 'HNRNPK',\n",
       " 'PSD3',\n",
       " 'MAPK1',\n",
       " 'GPR160',\n",
       " 'PDXK',\n",
       " 'TRIM22',\n",
       " 'PKHD1L1',\n",
       " 'SPARC',\n",
       " 'CPEB4',\n",
       " 'CCSER1',\n",
       " 'EMP2',\n",
       " 'CDK19',\n",
       " 'KANK1',\n",
       " 'STRBP',\n",
       " 'PLEKHA5',\n",
       " 'REEP5',\n",
       " 'PABPC1',\n",
       " 'CALD1',\n",
       " 'SH3RF1',\n",
       " 'FCRL2',\n",
       " 'CCR10',\n",
       " 'PRKCH',\n",
       " 'RHEX',\n",
       " 'CYBRD1',\n",
       " 'SLC35F2',\n",
       " 'GNB4',\n",
       " 'MYO1E',\n",
       " 'BEX5',\n",
       " 'AC004594.1',\n",
       " 'LPCAT2',\n",
       " 'TAPBPL',\n",
       " 'DNAJC1',\n",
       " 'RUBCN',\n",
       " 'ZNF165',\n",
       " 'B4GALT7',\n",
       " 'FOXC1',\n",
       " 'JCHAIN',\n",
       " 'ITGA8',\n",
       " 'CFL2',\n",
       " 'PIM1',\n",
       " 'SEC24D',\n",
       " 'NRP1',\n",
       " 'IGLV6-57',\n",
       " 'AC104024.1',\n",
       " 'FBXL7',\n",
       " 'EDEM2',\n",
       " 'PICALM',\n",
       " 'RPS24',\n",
       " 'CHCHD10',\n",
       " 'MAML3',\n",
       " 'ZFAND3',\n",
       " 'AC084200.1',\n",
       " 'NUCKS1',\n",
       " 'NFIB',\n",
       " 'PLSCR4',\n",
       " 'MYO5B',\n",
       " 'TRADD',\n",
       " 'CEP170',\n",
       " 'NFIX',\n",
       " 'TPM1',\n",
       " 'LPGAT1',\n",
       " 'LPP',\n",
       " 'TMEM107',\n",
       " 'CCNI',\n",
       " 'SASH1',\n",
       " 'PFN1',\n",
       " 'MCTP1',\n",
       " 'CPNE8',\n",
       " 'NAALADL2',\n",
       " 'LPIN1',\n",
       " 'AIM2',\n",
       " 'SESTD1',\n",
       " 'BACH1',\n",
       " 'TFDP2',\n",
       " 'ERG',\n",
       " 'RPN1',\n",
       " 'KIF21A',\n",
       " 'UNC93B1',\n",
       " 'SIK1',\n",
       " 'IGF1R',\n",
       " 'PBX3',\n",
       " 'TNFRSF1B',\n",
       " 'AC017002.5',\n",
       " 'GAPDH',\n",
       " 'CROCC',\n",
       " 'C2orf88',\n",
       " 'BICC1',\n",
       " 'GHR',\n",
       " 'MB21D2',\n",
       " 'SWAP70',\n",
       " 'DENND1B',\n",
       " 'HID1',\n",
       " 'PCM1',\n",
       " 'NRG3',\n",
       " 'TTC39C',\n",
       " 'KIF13A',\n",
       " 'EMP1',\n",
       " 'SPRY1',\n",
       " 'CFL1',\n",
       " 'SEPT7',\n",
       " 'ZBTB38',\n",
       " 'SRM',\n",
       " 'CDH11',\n",
       " 'MYL6',\n",
       " 'SPARCL1',\n",
       " 'ZNF521',\n",
       " 'DUSP22',\n",
       " 'LAMA4',\n",
       " 'AC008569.1',\n",
       " 'PTPN13',\n",
       " 'CTDSPL',\n",
       " 'CXCR4',\n",
       " 'STAT5B',\n",
       " 'PLP2',\n",
       " 'PPFIBP1',\n",
       " 'MAPK13',\n",
       " 'PDE7B',\n",
       " 'ERP29',\n",
       " 'CKAP4',\n",
       " 'TMSB10',\n",
       " 'ALDH1L2',\n",
       " 'EML6',\n",
       " 'ALCAM',\n",
       " 'RNASET2',\n",
       " 'JAM3',\n",
       " 'SERP1',\n",
       " 'PPIA',\n",
       " 'COL6A2',\n",
       " 'SEC61A1',\n",
       " 'PLS3',\n",
       " 'DGKI',\n",
       " 'RALGAPA2',\n",
       " 'PDIA5',\n",
       " 'CD109',\n",
       " 'MORF4L1',\n",
       " 'ENG',\n",
       " 'PTBP2',\n",
       " 'FSTL1',\n",
       " 'CLECL1',\n",
       " 'ADAMTS9-AS2',\n",
       " 'VAV3',\n",
       " 'ROCK1',\n",
       " 'AC104134.1',\n",
       " 'CLU',\n",
       " 'TNRC18',\n",
       " 'STXBP5',\n",
       " 'DENND5B',\n",
       " 'SRSF3',\n",
       " 'XRCC5',\n",
       " 'LARP1B',\n",
       " 'USP32',\n",
       " 'LRRFIP1',\n",
       " 'COMTD1',\n",
       " 'SLC44A2',\n",
       " 'GNAI1',\n",
       " 'NEDD4',\n",
       " 'PIK3R1',\n",
       " 'DPEP1',\n",
       " 'AKT3',\n",
       " 'SLC25A37',\n",
       " 'IRS2',\n",
       " 'LINC02384',\n",
       " 'ARID4B',\n",
       " 'CCPG1',\n",
       " 'CHID1',\n",
       " 'PHLPP2',\n",
       " 'RETREG1',\n",
       " 'AIF1',\n",
       " 'BTBD11',\n",
       " 'NAGLU',\n",
       " 'ACOXL',\n",
       " 'TPM2',\n",
       " 'NFKBIA',\n",
       " 'FN1',\n",
       " 'SPRED1',\n",
       " 'PLXND1',\n",
       " 'MYL12B',\n",
       " 'TRAM2',\n",
       " 'RPL22',\n",
       " 'LINC01484',\n",
       " 'SEC61G',\n",
       " 'LCP2',\n",
       " 'KDELR3',\n",
       " 'CTSZ',\n",
       " 'NREP',\n",
       " 'BIK',\n",
       " 'ZNF638',\n",
       " 'LIFR',\n",
       " 'FBXO16',\n",
       " 'BUD23',\n",
       " 'LRPAP1',\n",
       " 'ESD',\n",
       " 'TGFBR3L',\n",
       " 'IRAK2',\n",
       " 'SLC22A17',\n",
       " 'KIRREL1',\n",
       " 'AFF3',\n",
       " 'MIR29B2CHG',\n",
       " 'THEMIS2',\n",
       " 'FTX',\n",
       " 'LUZP1',\n",
       " 'UBE2D3',\n",
       " 'PTPRE',\n",
       " 'PIP5K1B',\n",
       " 'IGHGP',\n",
       " 'RIN3',\n",
       " 'JMJD1C',\n",
       " 'LIMCH1',\n",
       " 'STK38',\n",
       " 'ZBTB16',\n",
       " 'ID3',\n",
       " 'ITPR1',\n",
       " 'YIPF2',\n",
       " 'JMY',\n",
       " 'RPN2',\n",
       " 'C16orf54',\n",
       " 'HAX1',\n",
       " 'FRY',\n",
       " 'PMP22',\n",
       " 'TMEM56',\n",
       " 'ARRDC2',\n",
       " 'CD79B',\n",
       " 'ALG5',\n",
       " 'SLC39A7',\n",
       " 'BTG2',\n",
       " 'ORMDL3',\n",
       " 'LINC02362',\n",
       " 'CAT',\n",
       " 'TYMP',\n",
       " 'IGFBP5',\n",
       " 'CBLB',\n",
       " 'RASGRP2',\n",
       " 'HNRNPA3',\n",
       " 'DDX5',\n",
       " 'IFNAR2',\n",
       " 'AKR1C3',\n",
       " 'MSN',\n",
       " 'SMDT1',\n",
       " 'MBNL1',\n",
       " 'EGFL7',\n",
       " 'TBCEL',\n",
       " 'LAPTM5',\n",
       " 'WIPI1',\n",
       " 'PPIB',\n",
       " 'PCDH9',\n",
       " 'DESI1',\n",
       " 'RBPMS',\n",
       " 'RPL7',\n",
       " 'FNDC3B',\n",
       " 'BTF3',\n",
       " 'DCN',\n",
       " 'SVIP',\n",
       " 'NUDT22',\n",
       " 'NIPBL',\n",
       " 'HSPA5',\n",
       " 'PTBP3',\n",
       " 'ZNF254',\n",
       " 'SORL1',\n",
       " 'EHD4',\n",
       " 'MXRA8',\n",
       " 'COL9A3',\n",
       " 'AC012645.3',\n",
       " 'SLC38A10',\n",
       " 'TBC1D30',\n",
       " 'EML5',\n",
       " 'PDGFRA',\n",
       " 'RNF217',\n",
       " 'DPP7',\n",
       " 'TGFBR3',\n",
       " 'CNTN5',\n",
       " 'CYP1B1',\n",
       " 'ZNF704',\n",
       " 'FAM3C',\n",
       " 'NECTIN2',\n",
       " 'DNAJC3',\n",
       " 'OSBPL8',\n",
       " 'YAP1',\n",
       " 'SFPQ',\n",
       " 'RORA-AS1',\n",
       " 'SMPDL3B',\n",
       " 'ADARB1',\n",
       " 'PECAM1',\n",
       " 'GRIP1',\n",
       " 'AL391056.1',\n",
       " 'SERPINF1',\n",
       " 'AL365361.1',\n",
       " 'CYTH3',\n",
       " 'PDGFRB',\n",
       " 'LYST',\n",
       " 'CMTM3',\n",
       " 'SEPT11',\n",
       " 'SLC35B1',\n",
       " 'BEX3',\n",
       " 'NRIP1',\n",
       " 'CASC15',\n",
       " 'LARGE1',\n",
       " 'MACF1',\n",
       " 'PARD3B',\n",
       " 'VGLL4',\n",
       " 'BGN',\n",
       " 'DIAPH2',\n",
       " 'RFX2',\n",
       " 'ZNF503',\n",
       " 'ADGRL2',\n",
       " 'EBF3',\n",
       " 'PLD1',\n",
       " 'MIR4435-2HG',\n",
       " 'PARVA',\n",
       " 'CREB3',\n",
       " 'AC060809.1',\n",
       " 'ESR1',\n",
       " 'GLI3',\n",
       " 'MAN1A1',\n",
       " 'FLRT2',\n",
       " 'CFAP54',\n",
       " 'ME3',\n",
       " 'CMIP',\n",
       " 'SRSF5',\n",
       " 'SPAG1',\n",
       " 'PHC2',\n",
       " 'RIN2',\n",
       " 'GTDC1',\n",
       " 'MT-ND2',\n",
       " 'CD81',\n",
       " 'GJA1',\n",
       " 'CLIC4',\n",
       " 'PLOD2',\n",
       " 'PCBP2',\n",
       " 'SRP54',\n",
       " 'MTUS1',\n",
       " 'ATF5',\n",
       " 'LDLRAD3',\n",
       " 'TMEM147',\n",
       " 'ZNF43',\n",
       " 'DDX17',\n",
       " 'A2M',\n",
       " 'C11orf24',\n",
       " 'SUSD1',\n",
       " 'CHST15',\n",
       " 'CFD',\n",
       " 'NIN',\n",
       " 'ITGB2',\n",
       " 'ANGPT1',\n",
       " 'LGALSL',\n",
       " 'OSMR',\n",
       " 'NUPR1',\n",
       " 'Z93241.1',\n",
       " 'TNS1',\n",
       " 'PCOLCE',\n",
       " 'SOX5',\n",
       " 'LRRC59',\n",
       " 'WNT5B',\n",
       " 'CITED2',\n",
       " 'RPL23',\n",
       " 'CSF1',\n",
       " 'DACH1',\n",
       " 'PKD2',\n",
       " 'RBMS2',\n",
       " 'ZBTB8OS',\n",
       " 'RGS1',\n",
       " 'CDC42',\n",
       " 'SMIM3',\n",
       " 'C7orf50',\n",
       " 'COL18A1',\n",
       " 'WDFY2',\n",
       " 'VPS13B',\n",
       " 'RAC1',\n",
       " 'LINC02541',\n",
       " 'ACSL1',\n",
       " 'CCDC80',\n",
       " 'PALLD',\n",
       " 'STK39',\n",
       " 'ZNF618',\n",
       " 'MT2A',\n",
       " 'KITLG',\n",
       " 'UNC5C',\n",
       " 'SDCBP',\n",
       " 'DPYSL3',\n",
       " 'COL4A3',\n",
       " 'PIK3CG',\n",
       " 'HNMT',\n",
       " 'GLUL',\n",
       " 'ERGIC3',\n",
       " 'ST6GALNAC3',\n",
       " 'TNFAIP6',\n",
       " 'MAPK10',\n",
       " 'PBX1',\n",
       " 'GAS7',\n",
       " 'ATRAID',\n",
       " 'SCAMP5',\n",
       " 'MRC2',\n",
       " 'CACNA2D1',\n",
       " 'NID1',\n",
       " 'PMM2',\n",
       " 'IGFBP4',\n",
       " 'UBE2QL1',\n",
       " 'ARID3B',\n",
       " 'CTNND1',\n",
       " 'ELF2',\n",
       " 'PPARG',\n",
       " 'MT-ATP6',\n",
       " 'ISCU',\n",
       " 'LUM',\n",
       " 'AP002518.2',\n",
       " 'MYO1F',\n",
       " 'SLC17A9',\n",
       " 'PPP1R13B',\n",
       " 'AC026369.3',\n",
       " 'MPDZ',\n",
       " 'RPS13',\n",
       " 'SLC38A5',\n",
       " 'PTPN14',\n",
       " 'HSPG2',\n",
       " 'NR2F2',\n",
       " 'ARFGAP3',\n",
       " 'MAGI2',\n",
       " 'STOM',\n",
       " 'GNA15',\n",
       " 'ZFHX4',\n",
       " 'RPS3A',\n",
       " 'CALM2',\n",
       " 'NAP1L1',\n",
       " 'PREX2',\n",
       " 'PFKP',\n",
       " 'FNBP1',\n",
       " 'TLN1',\n",
       " 'MIER1',\n",
       " 'MAGI1',\n",
       " 'PREB',\n",
       " 'LST1',\n",
       " 'BCL7A',\n",
       " 'CHD1',\n",
       " 'ATAD2B',\n",
       " 'MYH9',\n",
       " 'NCF4',\n",
       " 'SLC50A1',\n",
       " 'EEF1A1',\n",
       " 'UST',\n",
       " 'ATAD3C',\n",
       " 'MYO10',\n",
       " 'PXK',\n",
       " 'IFNLR1',\n",
       " 'SAR1B',\n",
       " 'MBOAT2',\n",
       " 'RUNX2',\n",
       " 'CRIM1',\n",
       " 'ANTXR1',\n",
       " 'HOOK1',\n",
       " 'H2AFY',\n",
       " 'N4BP2L2',\n",
       " 'DPYD',\n",
       " 'COL4A2',\n",
       " 'SPINT1-AS1',\n",
       " 'UACA',\n",
       " 'DHRS3',\n",
       " 'SUMO2',\n",
       " 'COL5A1',\n",
       " 'PDGFC',\n",
       " 'RUNX1T1',\n",
       " 'LINC01781',\n",
       " 'SBF2',\n",
       " 'DLEU2',\n",
       " 'DMXL2',\n",
       " 'IGHA2',\n",
       " 'TUBB',\n",
       " 'SYNGR2',\n",
       " 'RPL39',\n",
       " 'WLS',\n",
       " 'IER5L',\n",
       " 'PYGL',\n",
       " 'SATB2',\n",
       " 'AC044849.1',\n",
       " 'SUB1',\n",
       " 'RBM39',\n",
       " 'TTTY14',\n",
       " 'HNRNPA1',\n",
       " 'C1QTNF1',\n",
       " 'ZMYM2',\n",
       " 'SON',\n",
       " 'WASF3',\n",
       " 'RHPN2',\n",
       " 'RPL31',\n",
       " 'LINC00877',\n",
       " 'KALRN',\n",
       " 'ERGIC2',\n",
       " 'COL3A1',\n",
       " 'APLP2',\n",
       " 'CYR61',\n",
       " 'C1GALT1C1',\n",
       " 'ABCG1',\n",
       " 'APBB1IP',\n",
       " 'ABHD14A',\n",
       " 'ASAP2',\n",
       " 'COL4A1',\n",
       " 'RAI14',\n",
       " 'WAC',\n",
       " 'UBA52',\n",
       " 'SELL',\n",
       " 'CD82',\n",
       " 'ITGBL1',\n",
       " 'SPIDR',\n",
       " 'SCAF11',\n",
       " 'RAB32',\n",
       " 'MT-CO2',\n",
       " 'ZSWIM6',\n",
       " 'PSTPIP2',\n",
       " 'CLDN3',\n",
       " 'MGP',\n",
       " 'AC103591.3',\n",
       " 'TM7SF2',\n",
       " 'SLCO3A1',\n",
       " 'MTRNR2L12',\n",
       " 'NLGN1',\n",
       " 'PTGER4',\n",
       " 'TBC1D2B',\n",
       " 'SPATA6',\n",
       " 'MAF',\n",
       " 'SMARCA1',\n",
       " 'AC126696.1',\n",
       " 'COX7A1',\n",
       " 'DNAJC4',\n",
       " 'STMN1',\n",
       " 'C1S',\n",
       " 'ADIRF',\n",
       " 'ST14',\n",
       " 'RPL38',\n",
       " 'RIC3',\n",
       " 'THRB',\n",
       " 'ST3GAL6',\n",
       " 'ADGRE5',\n",
       " 'CSNK1A1',\n",
       " 'AC023590.1',\n",
       " 'ITGA1',\n",
       " 'SURF1',\n",
       " 'DUSP26',\n",
       " 'HIPK3',\n",
       " 'FAM172A',\n",
       " 'ABCA8',\n",
       " 'OLFML3',\n",
       " 'CRISPLD2',\n",
       " 'CHD3',\n",
       " 'AARS',\n",
       " 'ARID3A',\n",
       " 'CD36',\n",
       " 'GORASP2',\n",
       " 'NCF1',\n",
       " 'MYLIP',\n",
       " 'TIFA',\n",
       " 'AGPAT4',\n",
       " 'SLC7A5',\n",
       " 'ARMC2',\n",
       " 'MAP3K20',\n",
       " 'FYB1',\n",
       " 'ST5',\n",
       " 'ACAP2',\n",
       " 'TBXAS1',\n",
       " 'LINC00571',\n",
       " 'USP48',\n",
       " 'GLCCI1',\n",
       " 'GNG11',\n",
       " 'PRSS16',\n",
       " 'NPDC1',\n",
       " 'RNF130',\n",
       " 'MPZL3',\n",
       " 'GRAMD1B',\n",
       " 'SIK1B',\n",
       " 'SLIT3',\n",
       " 'BACE2',\n",
       " 'BMP5',\n",
       " 'HHEX',\n",
       " 'SEC14L1',\n",
       " 'NAXE',\n",
       " 'ITGA9',\n",
       " 'RPL28',\n",
       " 'ADAMTS1',\n",
       " 'HNRNPD',\n",
       " 'MGAT2',\n",
       " 'SLC44A1',\n",
       " 'PRSS23',\n",
       " 'INPP5D',\n",
       " 'ATP8B4',\n",
       " 'MAP3K1',\n",
       " 'NBL1',\n",
       " 'B9D1',\n",
       " 'CCDC167',\n",
       " 'COMMD3',\n",
       " 'MYLK',\n",
       " 'SNX25',\n",
       " 'TBC1D16',\n",
       " 'KCNT2',\n",
       " 'DENND4C',\n",
       " 'CEP128',\n",
       " 'REV3L',\n",
       " 'TACC3',\n",
       " 'QKI',\n",
       " 'CD34',\n",
       " 'RASAL2',\n",
       " 'MIAT',\n",
       " 'ZNF131',\n",
       " 'TAGLN',\n",
       " 'LINC01480',\n",
       " 'PABPC4',\n",
       " 'KCNA3',\n",
       " 'ATRX',\n",
       " 'VCAN',\n",
       " 'PID1',\n",
       " 'ARRB1',\n",
       " 'CNRIP1',\n",
       " 'MAFB',\n",
       " 'CFLAR-AS1',\n",
       " 'SRP14',\n",
       " 'TRA2B',\n",
       " 'ATXN1',\n",
       " 'MYL12A',\n",
       " 'LUC7L3',\n",
       " 'RARRES2',\n",
       " 'HTRA1',\n",
       " 'NAV3',\n",
       " 'HYI',\n",
       " ...]"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"gene\"].tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "id": "3d9d2522-23fe-4356-ac92-af4862bbaa2b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "if \"1-Mar\" in df[\"gene\"].tolist():\n",
    "    print('ok')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "efe44cb9-3e10-4daf-a929-fb50980b9800",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cluster</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>11534</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>10835</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>12663</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>9804</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>9562</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>11792</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>16424</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>10799</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>10425</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>14220</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10</td>\n",
       "      <td>11920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>11</td>\n",
       "      <td>10973</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>12</td>\n",
       "      <td>12980</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>13</td>\n",
       "      <td>12496</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>14</td>\n",
       "      <td>9707</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>15</td>\n",
       "      <td>15446</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>16</td>\n",
       "      <td>16146</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>17</td>\n",
       "      <td>9363</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>18</td>\n",
       "      <td>12439</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>19</td>\n",
       "      <td>12388</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>20</td>\n",
       "      <td>8623</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>21</td>\n",
       "      <td>15549</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>22</td>\n",
       "      <td>10641</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>23</td>\n",
       "      <td>8992</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>24</td>\n",
       "      <td>12774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>25</td>\n",
       "      <td>15200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>26</td>\n",
       "      <td>12476</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>27</td>\n",
       "      <td>13015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>28</td>\n",
       "      <td>11689</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>29</td>\n",
       "      <td>16099</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>30</td>\n",
       "      <td>16667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>31</td>\n",
       "      <td>9071</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>32</td>\n",
       "      <td>11847</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>33</td>\n",
       "      <td>9698</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>34</td>\n",
       "      <td>9586</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>35</td>\n",
       "      <td>10454</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>36</td>\n",
       "      <td>10940</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>37</td>\n",
       "      <td>8474</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>38</td>\n",
       "      <td>11166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>39</td>\n",
       "      <td>12255</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>40</td>\n",
       "      <td>4584</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>41</td>\n",
       "      <td>5080</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>42</td>\n",
       "      <td>4478</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>43</td>\n",
       "      <td>8890</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>44</td>\n",
       "      <td>4470</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    cluster   size\n",
       "0         0  11534\n",
       "1         1  10835\n",
       "2         2  12663\n",
       "3         3   9804\n",
       "4         4   9562\n",
       "5         5  11792\n",
       "6         6  16424\n",
       "7         7  10799\n",
       "8         8  10425\n",
       "9         9  14220\n",
       "10       10  11920\n",
       "11       11  10973\n",
       "12       12  12980\n",
       "13       13  12496\n",
       "14       14   9707\n",
       "15       15  15446\n",
       "16       16  16146\n",
       "17       17   9363\n",
       "18       18  12439\n",
       "19       19  12388\n",
       "20       20   8623\n",
       "21       21  15549\n",
       "22       22  10641\n",
       "23       23   8992\n",
       "24       24  12774\n",
       "25       25  15200\n",
       "26       26  12476\n",
       "27       27  13015\n",
       "28       28  11689\n",
       "29       29  16099\n",
       "30       30  16667\n",
       "31       31   9071\n",
       "32       32  11847\n",
       "33       33   9698\n",
       "34       34   9586\n",
       "35       35  10454\n",
       "36       36  10940\n",
       "37       37   8474\n",
       "38       38  11166\n",
       "39       39  12255\n",
       "40       40   4584\n",
       "41       41   5080\n",
       "42       42   4478\n",
       "43       43   8890\n",
       "44       44   4470"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('cluster', as_index=False).size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "4afab5c0-6fa4-4be3-b205-7de40e719ea6",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "cluster_0= df[df['cluster'] == 0]\n",
    "cluster_1= df[df['cluster'] == 1]\n",
    "cluster_2= df[df['cluster'] == 2]\n",
    "cluster_3= df[df['cluster'] == 3]\n",
    "cluster_4= df[df['cluster'] == 4]\n",
    "cluster_5= df[df['cluster'] == 5]\n",
    "cluster_6= df[df['cluster'] == 6]\n",
    "cluster_7= df[df['cluster'] == 7]\n",
    "cluster_8= df[df['cluster'] == 8]\n",
    "cluster_9= df[df['cluster'] == 9]\n",
    "cluster_10= df[df['cluster'] == 10]\n",
    "cluster_11= df[df['cluster'] == 11]\n",
    "cluster_12= df[df['cluster'] == 12]\n",
    "cluster_13= df[df['cluster'] == 13]\n",
    "cluster_14= df[df['cluster'] == 14]\n",
    "cluster_15= df[df['cluster'] == 15]\n",
    "cluster_16= df[df['cluster'] == 16]\n",
    "cluster_17= df[df['cluster'] == 17]\n",
    "cluster_18= df[df['cluster'] == 18]\n",
    "cluster_19= df[df['cluster'] == 19]\n",
    "cluster_20= df[df['cluster'] == 20]\n",
    "cluster_21= df[df['cluster'] == 21]\n",
    "cluster_22= df[df['cluster'] == 22]\n",
    "cluster_23= df[df['cluster'] == 23]\n",
    "cluster_24= df[df['cluster'] == 24]\n",
    "cluster_25= df[df['cluster'] == 25]\n",
    "cluster_26= df[df['cluster'] == 26]\n",
    "cluster_27= df[df['cluster'] == 27]\n",
    "cluster_28= df[df['cluster'] == 28]\n",
    "cluster_29= df[df['cluster'] == 29]\n",
    "cluster_30= df[df['cluster'] == 30]\n",
    "cluster_31= df[df['cluster'] == 31]\n",
    "cluster_32= df[df['cluster'] == 32]\n",
    "cluster_33= df[df['cluster'] == 33]\n",
    "cluster_34= df[df['cluster'] == 34]\n",
    "cluster_35= df[df['cluster'] == 35]\n",
    "cluster_36= df[df['cluster'] == 36]\n",
    "cluster_37= df[df['cluster'] == 37]\n",
    "cluster_38= df[df['cluster'] == 38]\n",
    "cluster_39= df[df['cluster'] == 39]\n",
    "cluster_40= df[df['cluster'] == 40]\n",
    "cluster_41= df[df['cluster'] == 41]\n",
    "cluster_42= df[df['cluster'] == 42]\n",
    "cluster_43= df[df['cluster'] == 43]\n",
    "cluster_44= df[df['cluster'] == 44]\n",
    "\n",
    "\n",
    "df_list = [cluster_0, cluster_1, cluster_2, cluster_3, cluster_4, cluster_5, cluster_6, cluster_7, cluster_8, cluster_9, cluster_10, cluster_11, cluster_12, cluster_13,cluster_14,cluster_15,cluster_16,cluster_17,cluster_18,cluster_19,cluster_20,cluster_21,cluster_22,cluster_23,cluster_24,cluster_25,cluster_26,cluster_27,cluster_28,cluster_29,cluster_30,cluster_31,cluster_32,cluster_33,cluster_34,cluster_35,cluster_36,cluster_37,cluster_38,cluster_39,cluster_40,cluster_41,cluster_42,cluster_43,cluster_44]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "id": "3d7cd1ed-9c15-4275-8d37-66a6e6eacf56",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cluster_0= df[df['cluster'] == 0]\n",
      "cluster_1= df[df['cluster'] == 1]\n",
      "cluster_2= df[df['cluster'] == 2]\n",
      "cluster_3= df[df['cluster'] == 3]\n",
      "cluster_4= df[df['cluster'] == 4]\n",
      "cluster_5= df[df['cluster'] == 5]\n",
      "cluster_6= df[df['cluster'] == 6]\n",
      "cluster_7= df[df['cluster'] == 7]\n",
      "cluster_8= df[df['cluster'] == 8]\n",
      "cluster_9= df[df['cluster'] == 9]\n",
      "cluster_10= df[df['cluster'] == 10]\n",
      "cluster_11= df[df['cluster'] == 11]\n",
      "cluster_12= df[df['cluster'] == 12]\n",
      "cluster_13= df[df['cluster'] == 13]\n",
      "cluster_14= df[df['cluster'] == 14]\n",
      "cluster_15= df[df['cluster'] == 15]\n",
      "cluster_16= df[df['cluster'] == 16]\n",
      "cluster_17= df[df['cluster'] == 17]\n",
      "cluster_18= df[df['cluster'] == 18]\n",
      "cluster_19= df[df['cluster'] == 19]\n",
      "cluster_20= df[df['cluster'] == 20]\n",
      "cluster_21= df[df['cluster'] == 21]\n",
      "cluster_22= df[df['cluster'] == 22]\n",
      "cluster_23= df[df['cluster'] == 23]\n",
      "cluster_24= df[df['cluster'] == 24]\n",
      "cluster_25= df[df['cluster'] == 25]\n",
      "cluster_26= df[df['cluster'] == 26]\n",
      "cluster_27= df[df['cluster'] == 27]\n",
      "cluster_28= df[df['cluster'] == 28]\n",
      "cluster_29= df[df['cluster'] == 29]\n",
      "cluster_30= df[df['cluster'] == 30]\n",
      "cluster_31= df[df['cluster'] == 31]\n",
      "cluster_32= df[df['cluster'] == 32]\n",
      "cluster_33= df[df['cluster'] == 33]\n",
      "cluster_34= df[df['cluster'] == 34]\n"
     ]
    }
   ],
   "source": [
    "for i in range(35):\n",
    "    print('cluster_', str(i), \"= df[df['cluster'] == \", str(i)  + \"]\",sep='')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "2df9ccdc-28b4-44e3-9c4c-6cf6e66e4ee2",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>p_val</th>\n",
       "      <th>avg_log2FC</th>\n",
       "      <th>pct.1</th>\n",
       "      <th>pct.2</th>\n",
       "      <th>p_val_adj</th>\n",
       "      <th>cluster</th>\n",
       "      <th>gene</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>21741</th>\n",
       "      <td>PRSS57.12</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>4.241110</td>\n",
       "      <td>0.956</td>\n",
       "      <td>0.121</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>12</td>\n",
       "      <td>PRSS57</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21742</th>\n",
       "      <td>CDK6.9</td>\n",
       "      <td>1.960601e-262</td>\n",
       "      <td>2.607566</td>\n",
       "      <td>0.995</td>\n",
       "      <td>0.439</td>\n",
       "      <td>5.774362e-258</td>\n",
       "      <td>12</td>\n",
       "      <td>CDK6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21743</th>\n",
       "      <td>MPO.10</td>\n",
       "      <td>5.836475e-262</td>\n",
       "      <td>5.031454</td>\n",
       "      <td>0.898</td>\n",
       "      <td>0.121</td>\n",
       "      <td>1.718959e-257</td>\n",
       "      <td>12</td>\n",
       "      <td>MPO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21744</th>\n",
       "      <td>KCNQ5.11</td>\n",
       "      <td>3.181060e-259</td>\n",
       "      <td>3.317941</td>\n",
       "      <td>0.985</td>\n",
       "      <td>0.275</td>\n",
       "      <td>9.368857e-255</td>\n",
       "      <td>12</td>\n",
       "      <td>KCNQ5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21745</th>\n",
       "      <td>ATP8B4.12</td>\n",
       "      <td>3.950824e-255</td>\n",
       "      <td>2.476954</td>\n",
       "      <td>0.983</td>\n",
       "      <td>0.195</td>\n",
       "      <td>1.163597e-250</td>\n",
       "      <td>12</td>\n",
       "      <td>ATP8B4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23426</th>\n",
       "      <td>OASL.8</td>\n",
       "      <td>9.619843e-03</td>\n",
       "      <td>-2.489880</td>\n",
       "      <td>0.023</td>\n",
       "      <td>0.048</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>12</td>\n",
       "      <td>OASL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23427</th>\n",
       "      <td>IGLV3-21.6</td>\n",
       "      <td>9.633815e-03</td>\n",
       "      <td>-6.151218</td>\n",
       "      <td>0.014</td>\n",
       "      <td>0.024</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>12</td>\n",
       "      <td>IGLV3-21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23428</th>\n",
       "      <td>SLC38A11.10</td>\n",
       "      <td>9.829780e-03</td>\n",
       "      <td>-3.905051</td>\n",
       "      <td>0.004</td>\n",
       "      <td>0.018</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>12</td>\n",
       "      <td>SLC38A11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23429</th>\n",
       "      <td>AC246817.1.7</td>\n",
       "      <td>9.870367e-03</td>\n",
       "      <td>-0.301300</td>\n",
       "      <td>0.066</td>\n",
       "      <td>0.037</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>12</td>\n",
       "      <td>AC246817.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23430</th>\n",
       "      <td>IGLV1-47.6</td>\n",
       "      <td>9.890990e-03</td>\n",
       "      <td>-5.992063</td>\n",
       "      <td>0.010</td>\n",
       "      <td>0.016</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>12</td>\n",
       "      <td>IGLV1-47</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1690 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         Unnamed: 0          p_val  avg_log2FC  pct.1  pct.2      p_val_adj  \\\n",
       "21741     PRSS57.12   0.000000e+00    4.241110  0.956  0.121   0.000000e+00   \n",
       "21742        CDK6.9  1.960601e-262    2.607566  0.995  0.439  5.774362e-258   \n",
       "21743        MPO.10  5.836475e-262    5.031454  0.898  0.121  1.718959e-257   \n",
       "21744      KCNQ5.11  3.181060e-259    3.317941  0.985  0.275  9.368857e-255   \n",
       "21745     ATP8B4.12  3.950824e-255    2.476954  0.983  0.195  1.163597e-250   \n",
       "...             ...            ...         ...    ...    ...            ...   \n",
       "23426        OASL.8   9.619843e-03   -2.489880  0.023  0.048   1.000000e+00   \n",
       "23427    IGLV3-21.6   9.633815e-03   -6.151218  0.014  0.024   1.000000e+00   \n",
       "23428   SLC38A11.10   9.829780e-03   -3.905051  0.004  0.018   1.000000e+00   \n",
       "23429  AC246817.1.7   9.870367e-03   -0.301300  0.066  0.037   1.000000e+00   \n",
       "23430    IGLV1-47.6   9.890990e-03   -5.992063  0.010  0.016   1.000000e+00   \n",
       "\n",
       "       cluster        gene  \n",
       "21741       12      PRSS57  \n",
       "21742       12        CDK6  \n",
       "21743       12         MPO  \n",
       "21744       12       KCNQ5  \n",
       "21745       12      ATP8B4  \n",
       "...        ...         ...  \n",
       "23426       12        OASL  \n",
       "23427       12    IGLV3-21  \n",
       "23428       12    SLC38A11  \n",
       "23429       12  AC246817.1  \n",
       "23430       12    IGLV1-47  \n",
       "\n",
       "[1690 rows x 8 columns]"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cluster_12"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "2ac0df0a-db2c-4d74-a67e-918ca0f57282",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "ename": "IndexError",
     "evalue": "index 0 is out of bounds for axis 0 with size 0",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[18], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloc\u001b[49m\u001b[43m[\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mcluster\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m12\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m&\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mgene\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mCD3E\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mavg_log2FC\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\n",
      "\u001b[0;31mIndexError\u001b[0m: index 0 is out of bounds for axis 0 with size 0"
     ]
    }
   ],
   "source": [
    "df.loc[(df['cluster'] == 12) & (df['gene'] == 'CD3E'), 'avg_log2FC'].values[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ea318ee3-aeb6-42cd-9b7f-80fc6278c37f",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "singnalPos = ['+',  '++',  '+++',  'lo', 'mid', 'hi']\n",
    "singnalNeg = ['-']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "id": "bdc3d080-e49b-49e3-bf76-1f2ed69ff516",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def marker_result(exit, clusterType):\n",
    "    cellTypeNumDict = {}\n",
    "    count = 0\n",
    "    for x in exit:\n",
    "        for cell, markerListList in markers_dict.items():\n",
    "            for markerList in markerListList:\n",
    "                \n",
    "                if x in markerList[0]:\n",
    "                    FC = df.loc[(df['cluster'] == clusterType) & (df['gene'] == x), 'avg_log2FC'].values[0]\n",
    "                    pct1 = df.loc[(df['cluster'] == clusterType) & (df['gene'] == x), 'pct.1'].values[0]\n",
    "                    pct2 = df.loc[(df['cluster'] == clusterType) & (df['gene'] == x), 'pct.2'].values[0]\n",
    "                    p_val = df.loc[(df['cluster'] == clusterType) & (df['gene'] == x), 'p_val'].values[0]\n",
    "                    state = markerList[1]\n",
    "                    if (state in singnalPos and FC > 0) or (state in singnalNeg and FC < 0):\n",
    "                        if cell not in cellTypeNumDict.keys():\n",
    "                            cellTypeNumDict[cell] = 1\n",
    "\n",
    "                            cellTypeNum = len(markers_dict[cell])\n",
    "                        else:\n",
    "                            cellTypeNumDict[cell] += 1\n",
    "                        count += 1\n",
    "                        print(count, 'marker:',x, '    cell type:', cell, '   cellTypeNum:', cellTypeNum,'   state:', state, '    FC:', round(FC, 5), '    pct.1:', round(pct1, 5), '    pct.2:', round(pct2, 5) ,'   p_val:',p_val)\n",
    "                        \n",
    "    for cell, num in cellTypeNumDict.items():\n",
    "        print(\"cell Type:\", cell, \"  cellTypeNum:\", cellTypeNum, '  markersNum:', num, '  percent:', round(num/cellTypeNum, 4))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "29b455cb-5f77-42cd-904d-1d692485b94d",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 marker: SDC1     cell type: Plasma_Cells    cellTypeNum: 3    state: +     FC: 2.39052     pct.1: 0.79     pct.2: 0.136    p_val: 1.92754551064776e-174\n",
      "2 marker: SDC1     cell type: Adipocyte    cellTypeNum: 2    state: +     FC: 2.39052     pct.1: 0.79     pct.2: 0.136    p_val: 1.92754551064776e-174\n",
      "3 marker: CD38     cell type: GMP    cellTypeNum: 5    state: +     FC: 1.68066     pct.1: 0.946     pct.2: 0.301    p_val: 1.05603544619442e-150\n",
      "4 marker: CD38     cell type: Plasma_Cells    cellTypeNum: 5    state: +++     FC: 1.68066     pct.1: 0.946     pct.2: 0.301    p_val: 1.05603544619442e-150\n",
      "5 marker: CD79A     cell type: Immature_B_Cell    cellTypeNum: 3    state: +     FC: 0.95069     pct.1: 0.728     pct.2: 0.185    p_val: 6.41621419277449e-93\n",
      "6 marker: CD79A     cell type: B_Cells    cellTypeNum: 3    state: +     FC: 0.95069     pct.1: 0.728     pct.2: 0.185    p_val: 6.41621419277449e-93\n",
      "7 marker: CD79A     cell type: Plasma_Cells    cellTypeNum: 3    state: +     FC: 0.95069     pct.1: 0.728     pct.2: 0.185    p_val: 6.41621419277449e-93\n",
      "8 marker: PTPRC     cell type: HSC    cellTypeNum: 5    state: -     FC: -6.41387     pct.1: 0.061     pct.2: 0.403    p_val: 7.19315908716825e-83\n",
      "9 marker: PTPRC     cell type: GMP    cellTypeNum: 5    state: -     FC: -6.41387     pct.1: 0.061     pct.2: 0.403    p_val: 7.19315908716825e-83\n",
      "10 marker: CD34     cell type: pDC    cellTypeNum: 2    state: -     FC: -6.8449     pct.1: 0.007     pct.2: 0.176    p_val: 5.711216559574439e-40\n",
      "11 marker: CXCL12     cell type: SEC    cellTypeNum: 3    state: -     FC: -6.80677     pct.1: 0.164     pct.2: 0.347    p_val: 7.70670146820071e-38\n",
      "12 marker: CDH5     cell type: VSMC    cellTypeNum: 2    state: -     FC: -6.99487     pct.1: 0.004     pct.2: 0.071    p_val: 4.5084828116223495e-12\n",
      "13 marker: GATA1     cell type: GATA1neg_Mks    cellTypeNum: 3    state: -     FC: -5.76521     pct.1: 0.005     pct.2: 0.054    p_val: 1.04172967715839e-11\n",
      "14 marker: PAX5     cell type: B_Cells    cellTypeNum: 3    state: -     FC: -3.24593     pct.1: 0.042     pct.2: 0.069    p_val: 0.0002640822992166\n",
      "cell Type: Plasma_Cells   cellTypeNum: 3   markersNum: 3   percent: 1.0\n",
      "cell Type: Adipocyte   cellTypeNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: GMP   cellTypeNum: 3   markersNum: 2   percent: 0.6667\n",
      "cell Type: Immature_B_Cell   cellTypeNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: B_Cells   cellTypeNum: 3   markersNum: 2   percent: 0.6667\n",
      "cell Type: HSC   cellTypeNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: pDC   cellTypeNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: SEC   cellTypeNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: VSMC   cellTypeNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: GATA1neg_Mks   cellTypeNum: 3   markersNum: 1   percent: 0.3333\n"
     ]
    }
   ],
   "source": [
    "def get_annotation(clusterType):\n",
    "    geneList = unique_genes\n",
    "    clusterSelect= df[df['cluster'] == clusterType]\n",
    "    # clusterSelect = clusterSelect[clusterSelect['avg_log2FC'] > 0.25]\n",
    "    exit = [x for x in clusterSelect['gene'].tolist() if x in geneList ]\n",
    "    marker_result(exit, clusterType)\n",
    "    \n",
    "clusterType = 0    \n",
    "get_annotation(clusterType)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f42508b7-b178-4a80-bc36-31dbb81aa7d8",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "e6b1d50c-b51b-4490-8d55-6154595e4b9d",
   "metadata": {},
   "source": [
    "## 不带 +  -方向"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "id": "81615391-9eb2-4045-9ec3-f0d56a75d635",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "markers_dict = {\n",
    "    \n",
    "    # 造血干细胞及祖细胞\n",
    "    \"HSC\": [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-'], [\"CD34\",\"+\"], [\"CD38\", \"-\"], [\"PTPRC\", \"-\"], [\"THY1\", \"+\"]],   # +，++，+++，-，lo，mid，hi, lo/mid\n",
    "    \"SPINK2_HSPC\": [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-'], [\"SPINK2\", \"+\"], [\"CD34\", \"+\"]],\n",
    "    \"HSPC\": [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-'], [\"SPINK2\", \"-\"], [\"CD34\",\"+\"]],\n",
    "    \"GMP\": [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-'], [\"CD34\", \"+\"], [\"CD38\", \"+\"], [\"THY1\", \"+\"], [\"PTPRC\", \"-\"]],\n",
    "    \"GMP_Myeloblast\": [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-'], [\"CD34\", \"+\"], [\"CD33\", \"+\"]],\n",
    "    \"Early_Myeloid_Progenitor\": [[\"CD33\", \"+\"], [\"MPO\", \"+\"], [\"ITGAM\", \"lo\"]],\n",
    "    \"Intermediate_Myeloid\": [[\"CD33\", \"mid\"], [\"MPO\", \"mid\"], [\"ITGAM\", \"+\"]],\n",
    "    \"Mature_Myeloid\": [[\"CD33\", \"lo\"], [\"MPO\", \"lo\"], [\"ITGAM\", \"+\"], [\"THBD\", \"+\"]],\n",
    "    \n",
    "    # 髓系细胞\n",
    "    \"Monocytes\": [[\"CD14\", \"+\"]],\n",
    "    \"Non_Classical_Monocyte\": [[\"CD14\", \"lo\"], [\"ITGAX\", \"+\"], [\"HLA-DRA\", \"++\"]],\n",
    "    \"Macrophages\": [[\"VCAM1\", \"+\"], [\"CD68\", \"+\"], [\"CD163\", \"+\"]],\n",
    "    \"pDC\": [[\"IL3RA\", \"+\"], [\"CD34\", \"-\"]],\n",
    "    \n",
    "    # 淋系细胞\n",
    "    \"CLP\": [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-'], [\"CD34\", \"+\"], [\"CD38\", \"-\"], [\"PTPRC\", \"+\"]],\n",
    "    \"Immature_B_Cell\": [[\"PAX5\", \"+\"], [\"CD79A\", \"+\"], [\"CD38\", \"lo/mid\"]],\n",
    "    \"B_Cells\": [[\"PAX5\", \"-\"], [\"CD79A\", \"+\"], [\"CD38\", \"lo/mid\"]],\n",
    "    \"CD4_T_Cell\": [[\"CD3E\", \"+\"], [\"CD4\", \"+\"]],\n",
    "    \"CD8_T_Cell\": [[\"CD3E\", \"+\"], [\"CD8A\", \"+\"]],\n",
    "    \"Plasma_Cells\": [[\"CD79A\", \"+\"], [\"CD38\", \"+++\"], [\"SDC1\", \"+\"]],\n",
    "    \n",
    "    # 红系/巨核系\n",
    "    \"MEP_Early_Erythroblast\": [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-'], [\"CD34\", \"+\"], [\"GATA1\", \"+\"]],\n",
    "    \"CD34_CD61\": [['CD3E', '-'], ['CD19', '-'], ['NCAM1', '-'], ['CD14', '-'], ['CEACAM8', '-'], [\"CD34\", \"+\"], [\"ITGB3\", \"+\"]],\n",
    "    \"Erythroblast\": [[\"GATA1\", \"+\"], [\"TFRC\", \"+\"], [\"GYPC\", \"+\"]],\n",
    "    \"Erythroid\": [[\"GYPC\", \"+\"], [\"TFRC\", \"+\"]],\n",
    "    \"GATA1neg_Mks\": [[\"GATA1\", \"-\"], [\"ITGB3\", \"+\"], [\"TGFB1\", \"+\"]],\n",
    "    \"GATA1pos_Mks\": [[\"GATA1\", \"+\"], [\"ITGB3\", \"+\"], [\"TGFB1\", \"+\"]],\n",
    "    \n",
    "    # 间充质/基质细胞\n",
    "    \"Adipo_MSC\": [[\"FOXC1\", \"+\"], [\"CXCL12\", \"+\"], [\"THY1\",\"lo\"]],\n",
    "    \"THY1_MSC\": [[\"FOXC1\", \"+\"], [\"CXCL12\", \"+\"], [\"THY1\", \"hi\"]],\n",
    "    \"Adipocyte\": [[\"MCAM\", \"+\"], [\"SDC1\", \"+\"]],\n",
    "    \"Endosteal\": [[\"NCAM1\", \"+\"], [\"VIM\", \"+\"], [\"cluster_spatial_location\", \".\"]],\n",
    "    \"AEC\": [[\"CXCL12\", \"+\"], [\"CDH5\", \"+\"]],\n",
    "    \"SEC\": [[\"CDH5\", \"+\"], [\"CD34\", \"+\"], [\"CXCL12\", \"-\"]],\n",
    "    \"VSMC\": [[\"ACTA2\", \"+\"], [\"CDH5\", \"-\"]],\n",
    "    \"Schwann_Cell\": [[\"PLP1\", \"+\"], [\"NGFR\", \"+\"]]\n",
    "}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "id": "01d3bac0-b606-42ce-9014-7e75bf1b1d7b",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7"
      ]
     },
     "execution_count": 130,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(markers_dict['SPINK2_HSPC'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "id": "58efed26-9037-4006-ba81-6ab0f25af98e",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def marker_result_no_direction(exit, clusterType):\n",
    "    cellTypeNumDict = {}\n",
    "    count = 0\n",
    "    for x in exit:\n",
    "        for cell, markerListList in markers_dict.items():\n",
    "            for markerList in markerListList:\n",
    "\n",
    "                if x in markerList[0]:\n",
    "                    try:\n",
    "                        dfMiddle = df[df['avg_log2FC'].abs() >= 1]\n",
    "                        FC = dfMiddle.loc[(dfMiddle['cluster'] == clusterType) & (dfMiddle['gene'] == x), 'avg_log2FC'].values[0]\n",
    "                        pct1 =dfMiddle.loc[(dfMiddle['cluster'] == clusterType) & (dfMiddle['gene'] == x), 'pct.1'].values[0]\n",
    "                        pct2 = dfMiddle.loc[(dfMiddle['cluster'] == clusterType) & (dfMiddle['gene'] == x), 'pct.2'].values[0]\n",
    "                        p_val = dfMiddle.loc[(dfMiddle['cluster'] == clusterType) & (dfMiddle['gene'] == x), 'p_val'].values[0]\n",
    "                        state = markerList[1]  # + -\n",
    "                        cellTypeMarkerNum = len(markers_dict[cell])\n",
    "                        # if (state in singnalPos and FC > 0) or (state in singnalNeg and FC < 0):\n",
    "                        if cell not in cellTypeNumDict.keys():\n",
    "                            cellTypeNumDict[cell] = 1\n",
    "                        else:\n",
    "                            cellTypeNumDict[cell] += 1\n",
    "                        count += 1\n",
    "                        # print(count, 'marker:',x, '    cell type:', cell, '   cellTypeMarkerNum:', cellTypeMarkerNum,'   state:', state, '    FC:', round(FC, 5), '    pct.1:', round(pct1, 5), '    pct.2:', round(pct2, 5) ,'   p_val:',p_val)\n",
    "                    except:\n",
    "                        pass\n",
    "    # print(len(cellTypeNumDict))\n",
    "    count_1 = 0\n",
    "    for cell, num in cellTypeNumDict.items():\n",
    "        cellTypeMarkerNum = len(markers_dict[cell])\n",
    "        # print(\"cell Type:\", cell, \"  cellTypeMarkerNum:\", cellTypeMarkerNum, '  markersNum:', num, '  percent:', round(num/cellTypeMarkerNum, 4))\n",
    "        if num/cellTypeMarkerNum == 1:\n",
    "            count_1 += 1\n",
    "    print('clusterType:', clusterType, '  完全匹配数量:', count_1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "id": "ed2c7211-27dd-4c08-9ba3-be4cbf2dc69e",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "clusterType: 0   完全匹配数量: 10\n"
     ]
    }
   ],
   "source": [
    "def get_annotation(clusterType):\n",
    "    geneList = unique_genes\n",
    "    clusterSelect= df[df['cluster'] == clusterType]\n",
    "    # clusterSelect = clusterSelect[clusterSelect['avg_log2FC'] > 0.25]\n",
    "    exit = [x for x in clusterSelect['gene'].tolist() if x in geneList ]\n",
    "    marker_result_no_direction(exit, clusterType)\n",
    "    \n",
    "clusterType = 0    \n",
    "get_annotation(clusterType)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "id": "e400eb89-d8d2-4a81-b891-6cef4c57ea4c",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "clusterType: 0   完全匹配数量: 10\n",
      "clusterType: 1   完全匹配数量: 12\n",
      "clusterType: 2   完全匹配数量: 14\n",
      "clusterType: 3   完全匹配数量: 13\n",
      "clusterType: 4   完全匹配数量: 14\n",
      "clusterType: 5   完全匹配数量: 12\n",
      "clusterType: 6   完全匹配数量: 13\n",
      "clusterType: 7   完全匹配数量: 12\n",
      "clusterType: 8   完全匹配数量: 14\n",
      "clusterType: 9   完全匹配数量: 9\n",
      "clusterType: 10   完全匹配数量: 24\n",
      "clusterType: 11   完全匹配数量: 14\n",
      "clusterType: 12   完全匹配数量: 10\n",
      "clusterType: 13   完全匹配数量: 24\n",
      "clusterType: 14   完全匹配数量: 17\n",
      "clusterType: 15   完全匹配数量: 27\n",
      "clusterType: 16   完全匹配数量: 21\n",
      "clusterType: 17   完全匹配数量: 6\n",
      "clusterType: 18   完全匹配数量: 28\n",
      "clusterType: 19   完全匹配数量: 6\n",
      "clusterType: 20   完全匹配数量: 11\n",
      "clusterType: 21   完全匹配数量: 21\n",
      "clusterType: 22   完全匹配数量: 8\n",
      "clusterType: 23   完全匹配数量: 11\n",
      "clusterType: 24   完全匹配数量: 21\n",
      "clusterType: 25   完全匹配数量: 14\n",
      "clusterType: 26   完全匹配数量: 22\n",
      "clusterType: 27   完全匹配数量: 9\n",
      "clusterType: 28   完全匹配数量: 16\n",
      "clusterType: 29   完全匹配数量: 19\n",
      "clusterType: 30   完全匹配数量: 17\n",
      "clusterType: 31   完全匹配数量: 14\n",
      "clusterType: 32   完全匹配数量: 21\n",
      "clusterType: 33   完全匹配数量: 14\n",
      "clusterType: 34   完全匹配数量: 4\n",
      "clusterType: 35   完全匹配数量: 7\n",
      "clusterType: 36   完全匹配数量: 4\n",
      "clusterType: 37   完全匹配数量: 9\n",
      "clusterType: 38   完全匹配数量: 0\n",
      "clusterType: 39   完全匹配数量: 15\n",
      "clusterType: 40   完全匹配数量: 3\n",
      "clusterType: 41   完全匹配数量: 6\n",
      "clusterType: 42   完全匹配数量: 2\n",
      "clusterType: 43   完全匹配数量: 3\n",
      "clusterType: 44   完全匹配数量: 3\n"
     ]
    }
   ],
   "source": [
    "for cluster_type in range(45):\n",
    "    get_annotation(clusterType = cluster_type)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7678cc91-ca38-46d7-980d-0732ba7b93fa",
   "metadata": {},
   "source": [
    "## 带 + - 方向"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "7835f23c-3fd0-42a2-b617-c937d54cd716",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "singnalPos = ['+',  '++',  '+++',  'lo', 'mid', 'hi']\n",
    "singnalNeg = ['-']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "ecf06318-d0ea-48a8-9dfe-7abfe9878c10",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def marker_result_direction(exit, clusterType):\n",
    "    cellTypeNumDict = {}\n",
    "    count = 0\n",
    "    for x in exit:\n",
    "        for cell, markerListList in markers_dict.items():\n",
    "            for markerList in markerListList:\n",
    "                if x in markerList[0]:\n",
    "                    try:\n",
    "                        dfMiddle = df[df['avg_log2FC'].abs() >= 0.25]\n",
    "                        FC = dfMiddle.loc[(dfMiddle['cluster'] == clusterType) & (dfMiddle['gene'] == x), 'avg_log2FC'].values[0]\n",
    "                        pct1 =dfMiddle.loc[(dfMiddle['cluster'] == clusterType) & (dfMiddle['gene'] == x), 'pct.1'].values[0]\n",
    "                        pct2 = dfMiddle.loc[(dfMiddle['cluster'] == clusterType) & (dfMiddle['gene'] == x), 'pct.2'].values[0]\n",
    "                        p_val = dfMiddle.loc[(dfMiddle['cluster'] == clusterType) & (dfMiddle['gene'] == x), 'p_val'].values[0]\n",
    "                        state = markerList[1]  # + -\n",
    "                        cellTypeMarkerNum = len(markers_dict[cell])\n",
    "                        if (state in singnalPos and FC > 0) or (state in singnalNeg and FC < 0):\n",
    "                            if cell not in cellTypeNumDict.keys():\n",
    "                                cellTypeNumDict[cell] = 1\n",
    "                            else:\n",
    "                                cellTypeNumDict[cell] += 1\n",
    "                            count += 1\n",
    "                            print(count, 'marker:',x, '    cell type:', cell, '   cellTypeMarkerNum:', cellTypeMarkerNum,'   state:', state, '    FC:', round(FC, 5), '    pct.1:', round(pct1, 5), '    pct.2:', round(pct2, 5) ,'   p_val:',p_val)\n",
    "                    except:\n",
    "                        pass\n",
    "    # print(len(cellTypeNumDict))\n",
    "    count_1 = 0\n",
    "    cells = []\n",
    "    for cell, num in cellTypeNumDict.items():\n",
    "        cellTypeMarkerNum = len(markers_dict[cell])\n",
    "        print(\"cell Type:\", cell, \"  cellTypeMarkerNum:\", cellTypeMarkerNum, '  markersNum:', num, '  percent:', round(num/cellTypeMarkerNum, 4))\n",
    "        if num/cellTypeMarkerNum == 1:\n",
    "            count_1 += 1\n",
    "            cells.append(cell)\n",
    "    print('clusterType:', clusterType, '  完全匹配数量:', count_1,  '   细胞类型:', ','.join(cells))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "4ee38168-86bb-478f-aad7-8cfc91dcbf5b",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "clusterType: 10   完全匹配数量: 0    细胞类型: \n"
     ]
    }
   ],
   "source": [
    "def get_annotation2(clusterType):\n",
    "    geneList = unique_genes\n",
    "    clusterSelect= df[df['cluster'] == clusterType]\n",
    "    # clusterSelect = clusterSelect[clusterSelect['avg_log2FC'] > 0.25]\n",
    "    exit = [x for x in clusterSelect['gene'].tolist() if x in geneList ]\n",
    "    marker_result_direction(exit, clusterType)\n",
    "    \n",
    "clusterType = 10  \n",
    "get_annotation2(clusterType)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "c737fdec-1f64-4996-addc-bdccc115d5a5",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "clusterType: 17   完全匹配数量: 2    细胞类型: Erythroblast,Erythroid\n"
     ]
    }
   ],
   "source": [
    "clusterType = 17\n",
    "get_annotation2(clusterType)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "ec8381ce-03a7-455f-88f8-87feec0399c5",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "clusterType: 0   完全匹配数量: 1    细胞类型: Plasma_Cells\n",
      "clusterType: 1   完全匹配数量: 3    细胞类型: Adipo_MSC,THY1_MSC,Monocytes\n",
      "clusterType: 2   完全匹配数量: 1    细胞类型: Plasma_Cells\n",
      "clusterType: 3   完全匹配数量: 0    细胞类型: \n",
      "clusterType: 4   完全匹配数量: 1    细胞类型: Monocytes\n",
      "clusterType: 5   完全匹配数量: 3    细胞类型: Adipo_MSC,THY1_MSC,Schwann_Cell\n",
      "clusterType: 6   完全匹配数量: 0    细胞类型: \n",
      "clusterType: 7   完全匹配数量: 1    细胞类型: Erythroid\n",
      "clusterType: 8   完全匹配数量: 2    细胞类型: VSMC,Monocytes\n",
      "clusterType: 9   完全匹配数量: 5    细胞类型: MEP_Early_Erythroblast,Erythroblast,Erythroid,HSPC,CD34_CD61\n",
      "clusterType: 10   完全匹配数量: 0    细胞类型: \n",
      "clusterType: 11   完全匹配数量: 1    细胞类型: Plasma_Cells\n",
      "clusterType: 12   完全匹配数量: 1    细胞类型: Erythroid\n",
      "clusterType: 13   完全匹配数量: 1    细胞类型: CD8_T_Cell\n",
      "clusterType: 14   完全匹配数量: 0    细胞类型: \n",
      "clusterType: 15   完全匹配数量: 2    细胞类型: Erythroblast,Erythroid\n",
      "clusterType: 16   完全匹配数量: 0    细胞类型: \n",
      "clusterType: 17   完全匹配数量: 2    细胞类型: Erythroblast,Erythroid\n",
      "clusterType: 18   完全匹配数量: 2    细胞类型: CD4_T_Cell,CD8_T_Cell\n",
      "clusterType: 19   完全匹配数量: 2    细胞类型: Non_Classical_Monocyte,Monocytes\n",
      "clusterType: 20   完全匹配数量: 0    细胞类型: \n",
      "clusterType: 21   完全匹配数量: 2    细胞类型: Early_Myeloid_Progenitor,Intermediate_Myeloid\n",
      "clusterType: 22   完全匹配数量: 0    细胞类型: \n",
      "clusterType: 23   完全匹配数量: 1    细胞类型: AEC\n",
      "clusterType: 24   完全匹配数量: 3    细胞类型: Non_Classical_Monocyte,Monocytes,pDC\n",
      "clusterType: 25   完全匹配数量: 3    细胞类型: Adipo_MSC,THY1_MSC,VSMC\n",
      "clusterType: 26   完全匹配数量: 0    细胞类型: \n",
      "clusterType: 27   完全匹配数量: 0    细胞类型: \n",
      "clusterType: 28   完全匹配数量: 1    细胞类型: pDC\n",
      "clusterType: 29   完全匹配数量: 0    细胞类型: \n",
      "clusterType: 30   完全匹配数量: 1    细胞类型: Plasma_Cells\n",
      "clusterType: 31   完全匹配数量: 1    细胞类型: VSMC\n",
      "clusterType: 32   完全匹配数量: 0    细胞类型: \n",
      "clusterType: 33   完全匹配数量: 0    细胞类型: \n",
      "clusterType: 34   完全匹配数量: 1    细胞类型: Plasma_Cells\n",
      "clusterType: 35   完全匹配数量: 1    细胞类型: Erythroid\n",
      "clusterType: 36   完全匹配数量: 0    细胞类型: \n",
      "clusterType: 37   完全匹配数量: 0    细胞类型: \n",
      "clusterType: 38   完全匹配数量: 1    细胞类型: Erythroid\n",
      "clusterType: 39   完全匹配数量: 2    细胞类型: Erythroblast,Erythroid\n",
      "clusterType: 40   完全匹配数量: 0    细胞类型: \n",
      "clusterType: 41   完全匹配数量: 2    细胞类型: CD8_T_Cell,Plasma_Cells\n",
      "clusterType: 42   完全匹配数量: 2    细胞类型: Adipocyte,Plasma_Cells\n",
      "clusterType: 43   完全匹配数量: 1    细胞类型: Monocytes\n",
      "clusterType: 44   完全匹配数量: 1    细胞类型: Plasma_Cells\n"
     ]
    }
   ],
   "source": [
    "for cluster_type in range(45):\n",
    "    get_annotation2(clusterType = cluster_type)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "id": "6baecfd1-9536-4e34-9d1e-72fa56ab08d3",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 marker: CD3E     cell type: CD4_T_Cell    cellTypeMarkerNum: 2    state: +     FC: 4.88122     pct.1: 0.867     pct.2: 0.035    p_val: 3.85240404553089e-281\n",
      "2 marker: CD3E     cell type: CD8_T_Cell    cellTypeMarkerNum: 2    state: +     FC: 4.88122     pct.1: 0.867     pct.2: 0.035    p_val: 3.85240404553089e-281\n",
      "3 marker: PTPRC     cell type: CLP    cellTypeMarkerNum: 8    state: +     FC: 2.03594     pct.1: 0.984     pct.2: 0.362    p_val: 3.85758290993667e-210\n",
      "4 marker: CD38     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -3.08213     pct.1: 0.054     pct.2: 0.357    p_val: 5.5341366911447205e-67\n",
      "5 marker: CD38     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -3.08213     pct.1: 0.054     pct.2: 0.357    p_val: 5.5341366911447205e-67\n",
      "6 marker: TGFB1     cell type: GATA1neg_Mks    cellTypeMarkerNum: 3    state: +     FC: 1.12776     pct.1: 0.795     pct.2: 0.582    p_val: 1.03307295201289e-56\n",
      "7 marker: TGFB1     cell type: GATA1pos_Mks    cellTypeMarkerNum: 3    state: +     FC: 1.12776     pct.1: 0.795     pct.2: 0.582    p_val: 1.03307295201289e-56\n",
      "8 marker: CD4     cell type: CD4_T_Cell    cellTypeMarkerNum: 2    state: +     FC: 2.2153     pct.1: 0.368     pct.2: 0.125    p_val: 5.58640403654595e-50\n",
      "9 marker: CXCL12     cell type: SEC    cellTypeMarkerNum: 3    state: -     FC: -4.8332     pct.1: 0.088     pct.2: 0.339    p_val: 1.3158470519821601e-45\n",
      "10 marker: CD34     cell type: pDC    cellTypeMarkerNum: 2    state: -     FC: -5.80058     pct.1: 0.004     pct.2: 0.166    p_val: 4.35904481256707e-39\n",
      "11 marker: NCAM1     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -5.57839     pct.1: 0.012     pct.2: 0.165    p_val: 2.3457703992854803e-31\n",
      "12 marker: NCAM1     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -5.57839     pct.1: 0.012     pct.2: 0.165    p_val: 2.3457703992854803e-31\n",
      "13 marker: NCAM1     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -5.57839     pct.1: 0.012     pct.2: 0.165    p_val: 2.3457703992854803e-31\n",
      "14 marker: NCAM1     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -5.57839     pct.1: 0.012     pct.2: 0.165    p_val: 2.3457703992854803e-31\n",
      "15 marker: NCAM1     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -5.57839     pct.1: 0.012     pct.2: 0.165    p_val: 2.3457703992854803e-31\n",
      "16 marker: NCAM1     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -5.57839     pct.1: 0.012     pct.2: 0.165    p_val: 2.3457703992854803e-31\n",
      "17 marker: NCAM1     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -5.57839     pct.1: 0.012     pct.2: 0.165    p_val: 2.3457703992854803e-31\n",
      "18 marker: NCAM1     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -5.57839     pct.1: 0.012     pct.2: 0.165    p_val: 2.3457703992854803e-31\n",
      "19 marker: PAX5     cell type: B_Cells    cellTypeMarkerNum: 3    state: -     FC: -5.72366     pct.1: 0.002     pct.2: 0.068    p_val: 1.86581662943841e-20\n",
      "20 marker: CDH5     cell type: VSMC    cellTypeMarkerNum: 2    state: -     FC: -6.1037     pct.1: 0.002     pct.2: 0.067    p_val: 1.47280679672145e-16\n",
      "21 marker: CD8A     cell type: CD8_T_Cell    cellTypeMarkerNum: 2    state: +     FC: 1.8033     pct.1: 0.115     pct.2: 0.022    p_val: 7.860639477050371e-15\n",
      "22 marker: CD14     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -4.33164     pct.1: 0.004     pct.2: 0.066    p_val: 1.27405047548218e-13\n",
      "23 marker: CD14     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -4.33164     pct.1: 0.004     pct.2: 0.066    p_val: 1.27405047548218e-13\n",
      "24 marker: CD14     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -4.33164     pct.1: 0.004     pct.2: 0.066    p_val: 1.27405047548218e-13\n",
      "25 marker: CD14     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -4.33164     pct.1: 0.004     pct.2: 0.066    p_val: 1.27405047548218e-13\n",
      "26 marker: CD14     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -4.33164     pct.1: 0.004     pct.2: 0.066    p_val: 1.27405047548218e-13\n",
      "27 marker: CD14     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -4.33164     pct.1: 0.004     pct.2: 0.066    p_val: 1.27405047548218e-13\n",
      "28 marker: CD14     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -4.33164     pct.1: 0.004     pct.2: 0.066    p_val: 1.27405047548218e-13\n",
      "29 marker: CD14     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -4.33164     pct.1: 0.004     pct.2: 0.066    p_val: 1.27405047548218e-13\n",
      "30 marker: GATA1     cell type: GATA1neg_Mks    cellTypeMarkerNum: 3    state: -     FC: -3.98967     pct.1: 0.004     pct.2: 0.051    p_val: 7.819595514626829e-12\n",
      "31 marker: CD19     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -4.51821     pct.1: 0.0     pct.2: 0.037    p_val: 4.66350082051688e-10\n",
      "32 marker: CD19     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -4.51821     pct.1: 0.0     pct.2: 0.037    p_val: 4.66350082051688e-10\n",
      "33 marker: CD19     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -4.51821     pct.1: 0.0     pct.2: 0.037    p_val: 4.66350082051688e-10\n",
      "34 marker: CD19     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -4.51821     pct.1: 0.0     pct.2: 0.037    p_val: 4.66350082051688e-10\n",
      "35 marker: CD19     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -4.51821     pct.1: 0.0     pct.2: 0.037    p_val: 4.66350082051688e-10\n",
      "36 marker: CD19     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -4.51821     pct.1: 0.0     pct.2: 0.037    p_val: 4.66350082051688e-10\n",
      "37 marker: CD19     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -4.51821     pct.1: 0.0     pct.2: 0.037    p_val: 4.66350082051688e-10\n",
      "38 marker: CD19     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -4.51821     pct.1: 0.0     pct.2: 0.037    p_val: 4.66350082051688e-10\n",
      "39 marker: SPINK2     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -1.92836     pct.1: 0.062     pct.2: 0.146    p_val: 6.791220673661631e-09\n",
      "40 marker: CEACAM8     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -5.42222     pct.1: 0.004     pct.2: 0.032    p_val: 2.85655796868491e-06\n",
      "41 marker: CEACAM8     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -5.42222     pct.1: 0.004     pct.2: 0.032    p_val: 2.85655796868491e-06\n",
      "42 marker: CEACAM8     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -5.42222     pct.1: 0.004     pct.2: 0.032    p_val: 2.85655796868491e-06\n",
      "43 marker: CEACAM8     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -5.42222     pct.1: 0.004     pct.2: 0.032    p_val: 2.85655796868491e-06\n",
      "44 marker: CEACAM8     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -5.42222     pct.1: 0.004     pct.2: 0.032    p_val: 2.85655796868491e-06\n",
      "45 marker: CEACAM8     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -5.42222     pct.1: 0.004     pct.2: 0.032    p_val: 2.85655796868491e-06\n",
      "46 marker: CEACAM8     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -5.42222     pct.1: 0.004     pct.2: 0.032    p_val: 2.85655796868491e-06\n",
      "47 marker: CEACAM8     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -5.42222     pct.1: 0.004     pct.2: 0.032    p_val: 2.85655796868491e-06\n",
      "cell Type: CD4_T_Cell   cellTypeMarkerNum: 2   markersNum: 2   percent: 1.0\n",
      "cell Type: CD8_T_Cell   cellTypeMarkerNum: 2   markersNum: 2   percent: 1.0\n",
      "cell Type: CLP   cellTypeMarkerNum: 8   markersNum: 6   percent: 0.75\n",
      "cell Type: HSC   cellTypeMarkerNum: 9   markersNum: 5   percent: 0.5556\n",
      "cell Type: GATA1neg_Mks   cellTypeMarkerNum: 3   markersNum: 2   percent: 0.6667\n",
      "cell Type: GATA1pos_Mks   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: SEC   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: pDC   cellTypeMarkerNum: 2   markersNum: 1   percent: 0.5\n",
      "cell Type: SPINK2_HSPC   cellTypeMarkerNum: 7   markersNum: 4   percent: 0.5714\n",
      "cell Type: HSPC   cellTypeMarkerNum: 7   markersNum: 5   percent: 0.7143\n",
      "cell Type: GMP   cellTypeMarkerNum: 9   markersNum: 4   percent: 0.4444\n",
      "cell Type: GMP_Myeloblast   cellTypeMarkerNum: 7   markersNum: 4   percent: 0.5714\n",
      "cell Type: MEP_Early_Erythroblast   cellTypeMarkerNum: 7   markersNum: 4   percent: 0.5714\n",
      "cell Type: CD34_CD61   cellTypeMarkerNum: 7   markersNum: 4   percent: 0.5714\n",
      "cell Type: B_Cells   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: VSMC   cellTypeMarkerNum: 2   markersNum: 1   percent: 0.5\n",
      "clusterType: 18   完全匹配数量: 2    细胞类型: CD4_T_Cell,CD8_T_Cell\n"
     ]
    }
   ],
   "source": [
    "clusterType = 18\n",
    "get_annotation2(clusterType)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "id": "3442c440-6ce5-4984-b317-c854ec18ab42",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 marker: SDC1     cell type: Plasma_Cells    cellTypeMarkerNum: 3    state: +     FC: 2.39052     pct.1: 0.79     pct.2: 0.136    p_val: 1.92754551064776e-174\n",
      "2 marker: SDC1     cell type: Adipocyte    cellTypeMarkerNum: 2    state: +     FC: 2.39052     pct.1: 0.79     pct.2: 0.136    p_val: 1.92754551064776e-174\n",
      "3 marker: CD38     cell type: GMP    cellTypeMarkerNum: 9    state: +     FC: 1.68066     pct.1: 0.946     pct.2: 0.301    p_val: 1.05603544619442e-150\n",
      "4 marker: CD38     cell type: Plasma_Cells    cellTypeMarkerNum: 3    state: +++     FC: 1.68066     pct.1: 0.946     pct.2: 0.301    p_val: 1.05603544619442e-150\n",
      "5 marker: CD79A     cell type: Immature_B_Cell    cellTypeMarkerNum: 3    state: +     FC: 0.95069     pct.1: 0.728     pct.2: 0.185    p_val: 6.41621419277449e-93\n",
      "6 marker: CD79A     cell type: B_Cells    cellTypeMarkerNum: 3    state: +     FC: 0.95069     pct.1: 0.728     pct.2: 0.185    p_val: 6.41621419277449e-93\n",
      "7 marker: CD79A     cell type: Plasma_Cells    cellTypeMarkerNum: 3    state: +     FC: 0.95069     pct.1: 0.728     pct.2: 0.185    p_val: 6.41621419277449e-93\n",
      "8 marker: PTPRC     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -6.41387     pct.1: 0.061     pct.2: 0.403    p_val: 7.19315908716825e-83\n",
      "9 marker: PTPRC     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -6.41387     pct.1: 0.061     pct.2: 0.403    p_val: 7.19315908716825e-83\n",
      "10 marker: CD34     cell type: pDC    cellTypeMarkerNum: 2    state: -     FC: -6.8449     pct.1: 0.007     pct.2: 0.176    p_val: 5.711216559574439e-40\n",
      "11 marker: CXCL12     cell type: SEC    cellTypeMarkerNum: 3    state: -     FC: -6.80677     pct.1: 0.164     pct.2: 0.347    p_val: 7.70670146820071e-38\n",
      "12 marker: CD14     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -6.15743     pct.1: 0.006     pct.2: 0.069    p_val: 9.86765372924433e-17\n",
      "13 marker: CD14     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -6.15743     pct.1: 0.006     pct.2: 0.069    p_val: 9.86765372924433e-17\n",
      "14 marker: CD14     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -6.15743     pct.1: 0.006     pct.2: 0.069    p_val: 9.86765372924433e-17\n",
      "15 marker: CD14     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -6.15743     pct.1: 0.006     pct.2: 0.069    p_val: 9.86765372924433e-17\n",
      "16 marker: CD14     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -6.15743     pct.1: 0.006     pct.2: 0.069    p_val: 9.86765372924433e-17\n",
      "17 marker: CD14     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -6.15743     pct.1: 0.006     pct.2: 0.069    p_val: 9.86765372924433e-17\n",
      "18 marker: CD14     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -6.15743     pct.1: 0.006     pct.2: 0.069    p_val: 9.86765372924433e-17\n",
      "19 marker: CD14     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -6.15743     pct.1: 0.006     pct.2: 0.069    p_val: 9.86765372924433e-17\n",
      "20 marker: CDH5     cell type: VSMC    cellTypeMarkerNum: 2    state: -     FC: -6.99487     pct.1: 0.004     pct.2: 0.071    p_val: 4.5084828116223495e-12\n",
      "21 marker: GATA1     cell type: GATA1neg_Mks    cellTypeMarkerNum: 3    state: -     FC: -5.76521     pct.1: 0.005     pct.2: 0.054    p_val: 1.04172967715839e-11\n",
      "22 marker: CD3E     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -7.3956     pct.1: 0.003     pct.2: 0.057    p_val: 3.1387628847351795e-11\n",
      "23 marker: CD3E     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -7.3956     pct.1: 0.003     pct.2: 0.057    p_val: 3.1387628847351795e-11\n",
      "24 marker: CD3E     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -7.3956     pct.1: 0.003     pct.2: 0.057    p_val: 3.1387628847351795e-11\n",
      "25 marker: CD3E     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -7.3956     pct.1: 0.003     pct.2: 0.057    p_val: 3.1387628847351795e-11\n",
      "26 marker: CD3E     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -7.3956     pct.1: 0.003     pct.2: 0.057    p_val: 3.1387628847351795e-11\n",
      "27 marker: CD3E     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -7.3956     pct.1: 0.003     pct.2: 0.057    p_val: 3.1387628847351795e-11\n",
      "28 marker: CD3E     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -7.3956     pct.1: 0.003     pct.2: 0.057    p_val: 3.1387628847351795e-11\n",
      "29 marker: CD3E     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -7.3956     pct.1: 0.003     pct.2: 0.057    p_val: 3.1387628847351795e-11\n",
      "30 marker: CEACAM8     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -7.06098     pct.1: 0.003     pct.2: 0.034    p_val: 2.89635051243345e-05\n",
      "31 marker: CEACAM8     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -7.06098     pct.1: 0.003     pct.2: 0.034    p_val: 2.89635051243345e-05\n",
      "32 marker: CEACAM8     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -7.06098     pct.1: 0.003     pct.2: 0.034    p_val: 2.89635051243345e-05\n",
      "33 marker: CEACAM8     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -7.06098     pct.1: 0.003     pct.2: 0.034    p_val: 2.89635051243345e-05\n",
      "34 marker: CEACAM8     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -7.06098     pct.1: 0.003     pct.2: 0.034    p_val: 2.89635051243345e-05\n",
      "35 marker: CEACAM8     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -7.06098     pct.1: 0.003     pct.2: 0.034    p_val: 2.89635051243345e-05\n",
      "36 marker: CEACAM8     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -7.06098     pct.1: 0.003     pct.2: 0.034    p_val: 2.89635051243345e-05\n",
      "37 marker: CEACAM8     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -7.06098     pct.1: 0.003     pct.2: 0.034    p_val: 2.89635051243345e-05\n",
      "38 marker: PAX5     cell type: B_Cells    cellTypeMarkerNum: 3    state: -     FC: -3.24593     pct.1: 0.042     pct.2: 0.069    p_val: 0.0002640822992166\n",
      "cell Type: Plasma_Cells   cellTypeMarkerNum: 3   markersNum: 3   percent: 1.0\n",
      "cell Type: Adipocyte   cellTypeMarkerNum: 2   markersNum: 1   percent: 0.5\n",
      "cell Type: GMP   cellTypeMarkerNum: 9   markersNum: 5   percent: 0.5556\n",
      "cell Type: Immature_B_Cell   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: B_Cells   cellTypeMarkerNum: 3   markersNum: 2   percent: 0.6667\n",
      "cell Type: HSC   cellTypeMarkerNum: 9   markersNum: 4   percent: 0.4444\n",
      "cell Type: pDC   cellTypeMarkerNum: 2   markersNum: 1   percent: 0.5\n",
      "cell Type: SEC   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: SPINK2_HSPC   cellTypeMarkerNum: 7   markersNum: 3   percent: 0.4286\n",
      "cell Type: HSPC   cellTypeMarkerNum: 7   markersNum: 3   percent: 0.4286\n",
      "cell Type: GMP_Myeloblast   cellTypeMarkerNum: 7   markersNum: 3   percent: 0.4286\n",
      "cell Type: CLP   cellTypeMarkerNum: 8   markersNum: 3   percent: 0.375\n",
      "cell Type: MEP_Early_Erythroblast   cellTypeMarkerNum: 7   markersNum: 3   percent: 0.4286\n",
      "cell Type: CD34_CD61   cellTypeMarkerNum: 7   markersNum: 3   percent: 0.4286\n",
      "cell Type: VSMC   cellTypeMarkerNum: 2   markersNum: 1   percent: 0.5\n",
      "cell Type: GATA1neg_Mks   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "clusterType: 0   完全匹配数量: 1    细胞类型: Plasma_Cells\n"
     ]
    }
   ],
   "source": [
    "clusterType = 0\n",
    "get_annotation2(clusterType)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "id": "7b759007-6911-47a7-9c00-21d533175977",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 marker: VIM     cell type: Endosteal    cellTypeMarkerNum: 3    state: +     FC: 1.63361     pct.1: 0.988     pct.2: 0.907    p_val: 3.70433109866415e-159\n",
      "2 marker: CXCL12     cell type: Adipo_MSC    cellTypeMarkerNum: 3    state: +     FC: 3.88112     pct.1: 0.794     pct.2: 0.326    p_val: 1.19965431911325e-138\n",
      "3 marker: CXCL12     cell type: THY1_MSC    cellTypeMarkerNum: 3    state: +     FC: 3.88112     pct.1: 0.794     pct.2: 0.326    p_val: 1.19965431911325e-138\n",
      "4 marker: CXCL12     cell type: AEC    cellTypeMarkerNum: 2    state: +     FC: 3.88112     pct.1: 0.794     pct.2: 0.326    p_val: 1.19965431911325e-138\n",
      "5 marker: PTPRC     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -4.74359     pct.1: 0.021     pct.2: 0.382    p_val: 6.21910115934506e-81\n",
      "6 marker: PTPRC     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -4.74359     pct.1: 0.021     pct.2: 0.382    p_val: 6.21910115934506e-81\n",
      "7 marker: CD38     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -3.1573     pct.1: 0.028     pct.2: 0.355    p_val: 1.05758128739101e-76\n",
      "8 marker: CD38     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -3.1573     pct.1: 0.028     pct.2: 0.355    p_val: 1.05758128739101e-76\n",
      "9 marker: THY1     cell type: HSC    cellTypeMarkerNum: 9    state: +     FC: 3.69375     pct.1: 0.356     pct.2: 0.107    p_val: 7.38249298638704e-40\n",
      "10 marker: THY1     cell type: GMP    cellTypeMarkerNum: 9    state: +     FC: 3.69375     pct.1: 0.356     pct.2: 0.107    p_val: 7.38249298638704e-40\n",
      "11 marker: THY1     cell type: Adipo_MSC    cellTypeMarkerNum: 3    state: lo     FC: 3.69375     pct.1: 0.356     pct.2: 0.107    p_val: 7.38249298638704e-40\n",
      "12 marker: THY1     cell type: THY1_MSC    cellTypeMarkerNum: 3    state: hi     FC: 3.69375     pct.1: 0.356     pct.2: 0.107    p_val: 7.38249298638704e-40\n",
      "13 marker: VCAM1     cell type: Macrophages    cellTypeMarkerNum: 3    state: +     FC: 2.58707     pct.1: 0.44     pct.2: 0.195    p_val: 9.38597121271134e-32\n",
      "14 marker: CD34     cell type: pDC    cellTypeMarkerNum: 2    state: -     FC: -2.25134     pct.1: 0.017     pct.2: 0.165    p_val: 7.413628341409891e-31\n",
      "15 marker: SPINK2     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -2.02468     pct.1: 0.017     pct.2: 0.146    p_val: 1.31973048465337e-26\n",
      "16 marker: PAX5     cell type: B_Cells    cellTypeMarkerNum: 3    state: -     FC: -3.15782     pct.1: 0.006     pct.2: 0.068    p_val: 2.18007565164845e-13\n",
      "17 marker: CD3E     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -5.13822     pct.1: 0.001     pct.2: 0.054    p_val: 8.15939817793272e-13\n",
      "18 marker: CD3E     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -5.13822     pct.1: 0.001     pct.2: 0.054    p_val: 8.15939817793272e-13\n",
      "19 marker: CD3E     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -5.13822     pct.1: 0.001     pct.2: 0.054    p_val: 8.15939817793272e-13\n",
      "20 marker: CD3E     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -5.13822     pct.1: 0.001     pct.2: 0.054    p_val: 8.15939817793272e-13\n",
      "21 marker: CD3E     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -5.13822     pct.1: 0.001     pct.2: 0.054    p_val: 8.15939817793272e-13\n",
      "22 marker: CD3E     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -5.13822     pct.1: 0.001     pct.2: 0.054    p_val: 8.15939817793272e-13\n",
      "23 marker: CD3E     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -5.13822     pct.1: 0.001     pct.2: 0.054    p_val: 8.15939817793272e-13\n",
      "24 marker: CD3E     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -5.13822     pct.1: 0.001     pct.2: 0.054    p_val: 8.15939817793272e-13\n",
      "25 marker: CDH5     cell type: VSMC    cellTypeMarkerNum: 2    state: -     FC: -2.40868     pct.1: 0.009     pct.2: 0.067    p_val: 1.0851815970984499e-12\n",
      "26 marker: SDC1     cell type: Plasma_Cells    cellTypeMarkerNum: 3    state: +     FC: 0.66237     pct.1: 0.079     pct.2: 0.187    p_val: 2.4631805629632797e-11\n",
      "27 marker: SDC1     cell type: Adipocyte    cellTypeMarkerNum: 2    state: +     FC: 0.66237     pct.1: 0.079     pct.2: 0.187    p_val: 2.4631805629632797e-11\n",
      "28 marker: GATA1     cell type: GATA1neg_Mks    cellTypeMarkerNum: 3    state: -     FC: -3.40081     pct.1: 0.004     pct.2: 0.051    p_val: 3.0361079658195796e-11\n",
      "29 marker: NCAM1     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -1.24419     pct.1: 0.082     pct.2: 0.163    p_val: 1.76307765899512e-08\n",
      "30 marker: NCAM1     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -1.24419     pct.1: 0.082     pct.2: 0.163    p_val: 1.76307765899512e-08\n",
      "31 marker: NCAM1     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -1.24419     pct.1: 0.082     pct.2: 0.163    p_val: 1.76307765899512e-08\n",
      "32 marker: NCAM1     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -1.24419     pct.1: 0.082     pct.2: 0.163    p_val: 1.76307765899512e-08\n",
      "33 marker: NCAM1     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -1.24419     pct.1: 0.082     pct.2: 0.163    p_val: 1.76307765899512e-08\n",
      "34 marker: NCAM1     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -1.24419     pct.1: 0.082     pct.2: 0.163    p_val: 1.76307765899512e-08\n",
      "35 marker: NCAM1     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -1.24419     pct.1: 0.082     pct.2: 0.163    p_val: 1.76307765899512e-08\n",
      "36 marker: NCAM1     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -1.24419     pct.1: 0.082     pct.2: 0.163    p_val: 1.76307765899512e-08\n",
      "37 marker: CEACAM8     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -3.65797     pct.1: 0.003     pct.2: 0.032    p_val: 1.62750006908948e-07\n",
      "38 marker: CEACAM8     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -3.65797     pct.1: 0.003     pct.2: 0.032    p_val: 1.62750006908948e-07\n",
      "39 marker: CEACAM8     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -3.65797     pct.1: 0.003     pct.2: 0.032    p_val: 1.62750006908948e-07\n",
      "40 marker: CEACAM8     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -3.65797     pct.1: 0.003     pct.2: 0.032    p_val: 1.62750006908948e-07\n",
      "41 marker: CEACAM8     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -3.65797     pct.1: 0.003     pct.2: 0.032    p_val: 1.62750006908948e-07\n",
      "42 marker: CEACAM8     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -3.65797     pct.1: 0.003     pct.2: 0.032    p_val: 1.62750006908948e-07\n",
      "43 marker: CEACAM8     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -3.65797     pct.1: 0.003     pct.2: 0.032    p_val: 1.62750006908948e-07\n",
      "44 marker: CEACAM8     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -3.65797     pct.1: 0.003     pct.2: 0.032    p_val: 1.62750006908948e-07\n",
      "45 marker: CD19     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -0.84135     pct.1: 0.004     pct.2: 0.037    p_val: 7.26382951026835e-06\n",
      "46 marker: CD19     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -0.84135     pct.1: 0.004     pct.2: 0.037    p_val: 7.26382951026835e-06\n",
      "47 marker: CD19     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -0.84135     pct.1: 0.004     pct.2: 0.037    p_val: 7.26382951026835e-06\n",
      "48 marker: CD19     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -0.84135     pct.1: 0.004     pct.2: 0.037    p_val: 7.26382951026835e-06\n",
      "49 marker: CD19     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -0.84135     pct.1: 0.004     pct.2: 0.037    p_val: 7.26382951026835e-06\n",
      "50 marker: CD19     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -0.84135     pct.1: 0.004     pct.2: 0.037    p_val: 7.26382951026835e-06\n",
      "51 marker: CD19     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -0.84135     pct.1: 0.004     pct.2: 0.037    p_val: 7.26382951026835e-06\n",
      "52 marker: CD19     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -0.84135     pct.1: 0.004     pct.2: 0.037    p_val: 7.26382951026835e-06\n",
      "53 marker: ACTA2     cell type: VSMC    cellTypeMarkerNum: 2    state: +     FC: 1.17194     pct.1: 0.19     pct.2: 0.153    p_val: 8.4737241869599e-05\n",
      "54 marker: CD14     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -1.0512     pct.1: 0.022     pct.2: 0.065    p_val: 0.0001869546391092\n",
      "55 marker: CD14     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -1.0512     pct.1: 0.022     pct.2: 0.065    p_val: 0.0001869546391092\n",
      "56 marker: CD14     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -1.0512     pct.1: 0.022     pct.2: 0.065    p_val: 0.0001869546391092\n",
      "57 marker: CD14     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -1.0512     pct.1: 0.022     pct.2: 0.065    p_val: 0.0001869546391092\n",
      "58 marker: CD14     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -1.0512     pct.1: 0.022     pct.2: 0.065    p_val: 0.0001869546391092\n",
      "59 marker: CD14     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -1.0512     pct.1: 0.022     pct.2: 0.065    p_val: 0.0001869546391092\n",
      "60 marker: CD14     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -1.0512     pct.1: 0.022     pct.2: 0.065    p_val: 0.0001869546391092\n",
      "61 marker: CD14     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -1.0512     pct.1: 0.022     pct.2: 0.065    p_val: 0.0001869546391092\n",
      "62 marker: FOXC1     cell type: Adipo_MSC    cellTypeMarkerNum: 3    state: +     FC: 1.35328     pct.1: 0.301     pct.2: 0.276    p_val: 0.0028210377757403\n",
      "63 marker: FOXC1     cell type: THY1_MSC    cellTypeMarkerNum: 3    state: +     FC: 1.35328     pct.1: 0.301     pct.2: 0.276    p_val: 0.0028210377757403\n",
      "64 marker: ITGB3     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: +     FC: 0.28318     pct.1: 0.004     pct.2: 0.014    p_val: 0.007212538797928\n",
      "65 marker: ITGB3     cell type: GATA1neg_Mks    cellTypeMarkerNum: 3    state: +     FC: 0.28318     pct.1: 0.004     pct.2: 0.014    p_val: 0.007212538797928\n",
      "66 marker: ITGB3     cell type: GATA1pos_Mks    cellTypeMarkerNum: 3    state: +     FC: 0.28318     pct.1: 0.004     pct.2: 0.014    p_val: 0.007212538797928\n",
      "cell Type: Endosteal   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: Adipo_MSC   cellTypeMarkerNum: 3   markersNum: 3   percent: 1.0\n",
      "cell Type: THY1_MSC   cellTypeMarkerNum: 3   markersNum: 3   percent: 1.0\n",
      "cell Type: AEC   cellTypeMarkerNum: 2   markersNum: 1   percent: 0.5\n",
      "cell Type: HSC   cellTypeMarkerNum: 9   markersNum: 8   percent: 0.8889\n",
      "cell Type: GMP   cellTypeMarkerNum: 9   markersNum: 7   percent: 0.7778\n",
      "cell Type: CLP   cellTypeMarkerNum: 8   markersNum: 6   percent: 0.75\n",
      "cell Type: Macrophages   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: pDC   cellTypeMarkerNum: 2   markersNum: 1   percent: 0.5\n",
      "cell Type: HSPC   cellTypeMarkerNum: 7   markersNum: 6   percent: 0.8571\n",
      "cell Type: B_Cells   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: SPINK2_HSPC   cellTypeMarkerNum: 7   markersNum: 5   percent: 0.7143\n",
      "cell Type: GMP_Myeloblast   cellTypeMarkerNum: 7   markersNum: 5   percent: 0.7143\n",
      "cell Type: MEP_Early_Erythroblast   cellTypeMarkerNum: 7   markersNum: 5   percent: 0.7143\n",
      "cell Type: CD34_CD61   cellTypeMarkerNum: 7   markersNum: 6   percent: 0.8571\n",
      "cell Type: VSMC   cellTypeMarkerNum: 2   markersNum: 2   percent: 1.0\n",
      "cell Type: Plasma_Cells   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: Adipocyte   cellTypeMarkerNum: 2   markersNum: 1   percent: 0.5\n",
      "cell Type: GATA1neg_Mks   cellTypeMarkerNum: 3   markersNum: 2   percent: 0.6667\n",
      "cell Type: GATA1pos_Mks   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "clusterType: 25   完全匹配数量: 3    细胞类型: Adipo_MSC,THY1_MSC,VSMC\n"
     ]
    }
   ],
   "source": [
    "clusterType = 25\n",
    "get_annotation2(clusterType)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "id": "400d83c5-152d-40e2-b46d-0f545590cd6b",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 marker: VIM     cell type: Endosteal    cellTypeMarkerNum: 3    state: +     FC: 1.52765     pct.1: 0.995     pct.2: 0.906    p_val: 3.15919079561871e-179\n",
      "2 marker: FOXC1     cell type: Adipo_MSC    cellTypeMarkerNum: 3    state: +     FC: 1.68017     pct.1: 0.77     pct.2: 0.266    p_val: 5.637466214390351e-110\n",
      "3 marker: FOXC1     cell type: THY1_MSC    cellTypeMarkerNum: 3    state: +     FC: 1.68017     pct.1: 0.77     pct.2: 0.266    p_val: 5.637466214390351e-110\n",
      "4 marker: PTPRC     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -5.04462     pct.1: 0.04     pct.2: 0.383    p_val: 3.51433755312244e-84\n",
      "5 marker: PTPRC     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -5.04462     pct.1: 0.04     pct.2: 0.383    p_val: 3.51433755312244e-84\n",
      "6 marker: CD38     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -4.04613     pct.1: 0.035     pct.2: 0.357    p_val: 4.84000281285577e-66\n",
      "7 marker: CD38     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -4.04613     pct.1: 0.035     pct.2: 0.357    p_val: 4.84000281285577e-66\n",
      "8 marker: NCAM1     cell type: Endosteal    cellTypeMarkerNum: 3    state: +     FC: 0.44055     pct.1: 0.466     pct.2: 0.155    p_val: 1.10125364208222e-41\n",
      "9 marker: VCAM1     cell type: Macrophages    cellTypeMarkerNum: 3    state: +     FC: 0.57153     pct.1: 0.484     pct.2: 0.193    p_val: 1.1998440768657401e-31\n",
      "10 marker: SPINK2     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -4.61783     pct.1: 0.009     pct.2: 0.147    p_val: 1.2230750888651898e-30\n",
      "11 marker: CXCL12     cell type: SEC    cellTypeMarkerNum: 3    state: -     FC: -1.22514     pct.1: 0.643     pct.2: 0.327    p_val: 2.31794378199787e-27\n",
      "12 marker: NGFR     cell type: Schwann_Cell    cellTypeMarkerNum: 2    state: +     FC: 1.66414     pct.1: 0.248     pct.2: 0.086    p_val: 8.316101224562061e-23\n",
      "13 marker: CD34     cell type: pDC    cellTypeMarkerNum: 2    state: -     FC: -3.05718     pct.1: 0.03     pct.2: 0.166    p_val: 2.0086755912256502e-20\n",
      "14 marker: THY1     cell type: HSC    cellTypeMarkerNum: 9    state: +     FC: 0.74145     pct.1: 0.268     pct.2: 0.107    p_val: 1.22855821660781e-17\n",
      "15 marker: THY1     cell type: GMP    cellTypeMarkerNum: 9    state: +     FC: 0.74145     pct.1: 0.268     pct.2: 0.107    p_val: 1.22855821660781e-17\n",
      "16 marker: THY1     cell type: Adipo_MSC    cellTypeMarkerNum: 3    state: lo     FC: 0.74145     pct.1: 0.268     pct.2: 0.107    p_val: 1.22855821660781e-17\n",
      "17 marker: THY1     cell type: THY1_MSC    cellTypeMarkerNum: 3    state: hi     FC: 0.74145     pct.1: 0.268     pct.2: 0.107    p_val: 1.22855821660781e-17\n",
      "18 marker: GATA1     cell type: GATA1neg_Mks    cellTypeMarkerNum: 3    state: -     FC: -4.8783     pct.1: 0.003     pct.2: 0.051    p_val: 4.01826468517298e-12\n",
      "19 marker: PAX5     cell type: B_Cells    cellTypeMarkerNum: 3    state: -     FC: -5.07849     pct.1: 0.006     pct.2: 0.068    p_val: 1.28586091639856e-10\n",
      "20 marker: CDH5     cell type: VSMC    cellTypeMarkerNum: 2    state: -     FC: -3.3412     pct.1: 0.014     pct.2: 0.067    p_val: 1.8462198773961902e-10\n",
      "21 marker: CD3E     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -3.68106     pct.1: 0.01     pct.2: 0.054    p_val: 1.45045939797487e-08\n",
      "22 marker: CD3E     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -3.68106     pct.1: 0.01     pct.2: 0.054    p_val: 1.45045939797487e-08\n",
      "23 marker: CD3E     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -3.68106     pct.1: 0.01     pct.2: 0.054    p_val: 1.45045939797487e-08\n",
      "24 marker: CD3E     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -3.68106     pct.1: 0.01     pct.2: 0.054    p_val: 1.45045939797487e-08\n",
      "25 marker: CD3E     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -3.68106     pct.1: 0.01     pct.2: 0.054    p_val: 1.45045939797487e-08\n",
      "26 marker: CD3E     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -3.68106     pct.1: 0.01     pct.2: 0.054    p_val: 1.45045939797487e-08\n",
      "27 marker: CD3E     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -3.68106     pct.1: 0.01     pct.2: 0.054    p_val: 1.45045939797487e-08\n",
      "28 marker: CD3E     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -3.68106     pct.1: 0.01     pct.2: 0.054    p_val: 1.45045939797487e-08\n",
      "29 marker: CD19     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -3.86144     pct.1: 0.002     pct.2: 0.037    p_val: 9.1294824863306e-08\n",
      "30 marker: CD19     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -3.86144     pct.1: 0.002     pct.2: 0.037    p_val: 9.1294824863306e-08\n",
      "31 marker: CD19     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -3.86144     pct.1: 0.002     pct.2: 0.037    p_val: 9.1294824863306e-08\n",
      "32 marker: CD19     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -3.86144     pct.1: 0.002     pct.2: 0.037    p_val: 9.1294824863306e-08\n",
      "33 marker: CD19     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -3.86144     pct.1: 0.002     pct.2: 0.037    p_val: 9.1294824863306e-08\n",
      "34 marker: CD19     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -3.86144     pct.1: 0.002     pct.2: 0.037    p_val: 9.1294824863306e-08\n",
      "35 marker: CD19     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -3.86144     pct.1: 0.002     pct.2: 0.037    p_val: 9.1294824863306e-08\n",
      "36 marker: CD19     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -3.86144     pct.1: 0.002     pct.2: 0.037    p_val: 9.1294824863306e-08\n",
      "37 marker: CEACAM8     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -5.34899     pct.1: 0.003     pct.2: 0.032    p_val: 9.97176183645324e-08\n",
      "38 marker: CEACAM8     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -5.34899     pct.1: 0.003     pct.2: 0.032    p_val: 9.97176183645324e-08\n",
      "39 marker: CEACAM8     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -5.34899     pct.1: 0.003     pct.2: 0.032    p_val: 9.97176183645324e-08\n",
      "40 marker: CEACAM8     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -5.34899     pct.1: 0.003     pct.2: 0.032    p_val: 9.97176183645324e-08\n",
      "41 marker: CEACAM8     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -5.34899     pct.1: 0.003     pct.2: 0.032    p_val: 9.97176183645324e-08\n",
      "42 marker: CEACAM8     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -5.34899     pct.1: 0.003     pct.2: 0.032    p_val: 9.97176183645324e-08\n",
      "43 marker: CEACAM8     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -5.34899     pct.1: 0.003     pct.2: 0.032    p_val: 9.97176183645324e-08\n",
      "44 marker: CEACAM8     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -5.34899     pct.1: 0.003     pct.2: 0.032    p_val: 9.97176183645324e-08\n",
      "45 marker: CD68     cell type: Macrophages    cellTypeMarkerNum: 3    state: +     FC: 0.75078     pct.1: 0.147     pct.2: 0.092    p_val: 4.55372717678983e-06\n",
      "46 marker: CD4     cell type: CD4_T_Cell    cellTypeMarkerNum: 2    state: +     FC: 0.60955     pct.1: 0.202     pct.2: 0.129    p_val: 5.99298404195102e-05\n",
      "cell Type: Endosteal   cellTypeMarkerNum: 3   markersNum: 2   percent: 0.6667\n",
      "cell Type: Adipo_MSC   cellTypeMarkerNum: 3   markersNum: 2   percent: 0.6667\n",
      "cell Type: THY1_MSC   cellTypeMarkerNum: 3   markersNum: 2   percent: 0.6667\n",
      "cell Type: HSC   cellTypeMarkerNum: 9   markersNum: 6   percent: 0.6667\n",
      "cell Type: GMP   cellTypeMarkerNum: 9   markersNum: 5   percent: 0.5556\n",
      "cell Type: CLP   cellTypeMarkerNum: 8   markersNum: 4   percent: 0.5\n",
      "cell Type: Macrophages   cellTypeMarkerNum: 3   markersNum: 2   percent: 0.6667\n",
      "cell Type: HSPC   cellTypeMarkerNum: 7   markersNum: 4   percent: 0.5714\n",
      "cell Type: SEC   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: Schwann_Cell   cellTypeMarkerNum: 2   markersNum: 1   percent: 0.5\n",
      "cell Type: pDC   cellTypeMarkerNum: 2   markersNum: 1   percent: 0.5\n",
      "cell Type: GATA1neg_Mks   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: B_Cells   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: VSMC   cellTypeMarkerNum: 2   markersNum: 1   percent: 0.5\n",
      "cell Type: SPINK2_HSPC   cellTypeMarkerNum: 7   markersNum: 3   percent: 0.4286\n",
      "cell Type: GMP_Myeloblast   cellTypeMarkerNum: 7   markersNum: 3   percent: 0.4286\n",
      "cell Type: MEP_Early_Erythroblast   cellTypeMarkerNum: 7   markersNum: 3   percent: 0.4286\n",
      "cell Type: CD34_CD61   cellTypeMarkerNum: 7   markersNum: 3   percent: 0.4286\n",
      "cell Type: CD4_T_Cell   cellTypeMarkerNum: 2   markersNum: 1   percent: 0.5\n",
      "clusterType: 20   完全匹配数量: 0    细胞类型: \n"
     ]
    }
   ],
   "source": [
    "clusterType = 20\n",
    "get_annotation2(clusterType)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "id": "c83d65a9-49e1-402f-9fdb-11456732a87d",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 marker: MCAM     cell type: Adipocyte    cellTypeMarkerNum: 2    state: +     FC: 5.74447     pct.1: 0.975     pct.2: 0.083    p_val: 0.0\n",
      "2 marker: ACTA2     cell type: VSMC    cellTypeMarkerNum: 2    state: +     FC: 5.96625     pct.1: 0.983     pct.2: 0.123    p_val: 0.0\n",
      "3 marker: FOXC1     cell type: Adipo_MSC    cellTypeMarkerNum: 3    state: +     FC: 1.96544     pct.1: 0.856     pct.2: 0.256    p_val: 2.4367913483733103e-145\n",
      "4 marker: FOXC1     cell type: THY1_MSC    cellTypeMarkerNum: 3    state: +     FC: 1.96544     pct.1: 0.856     pct.2: 0.256    p_val: 2.4367913483733103e-145\n",
      "5 marker: CD38     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -4.26651     pct.1: 0.029     pct.2: 0.362    p_val: 1.8675831863783999e-82\n",
      "6 marker: CD38     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -4.26651     pct.1: 0.029     pct.2: 0.362    p_val: 1.8675831863783999e-82\n",
      "7 marker: PTPRC     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -4.38151     pct.1: 0.054     pct.2: 0.388    p_val: 1.82605095618633e-71\n",
      "8 marker: PTPRC     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -4.38151     pct.1: 0.054     pct.2: 0.388    p_val: 1.82605095618633e-71\n",
      "9 marker: VIM     cell type: Endosteal    cellTypeMarkerNum: 3    state: +     FC: 0.36938     pct.1: 0.993     pct.2: 0.905    p_val: 6.684579980032461e-35\n",
      "10 marker: NCAM1     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -4.13612     pct.1: 0.022     pct.2: 0.167    p_val: 2.18300969712819e-29\n",
      "11 marker: NCAM1     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -4.13612     pct.1: 0.022     pct.2: 0.167    p_val: 2.18300969712819e-29\n",
      "12 marker: NCAM1     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -4.13612     pct.1: 0.022     pct.2: 0.167    p_val: 2.18300969712819e-29\n",
      "13 marker: NCAM1     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -4.13612     pct.1: 0.022     pct.2: 0.167    p_val: 2.18300969712819e-29\n",
      "14 marker: NCAM1     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -4.13612     pct.1: 0.022     pct.2: 0.167    p_val: 2.18300969712819e-29\n",
      "15 marker: NCAM1     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -4.13612     pct.1: 0.022     pct.2: 0.167    p_val: 2.18300969712819e-29\n",
      "16 marker: NCAM1     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -4.13612     pct.1: 0.022     pct.2: 0.167    p_val: 2.18300969712819e-29\n",
      "17 marker: NCAM1     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -4.13612     pct.1: 0.022     pct.2: 0.167    p_val: 2.18300969712819e-29\n",
      "18 marker: NGFR     cell type: Schwann_Cell    cellTypeMarkerNum: 2    state: +     FC: 1.24589     pct.1: 0.264     pct.2: 0.083    p_val: 3.4715410574092296e-28\n",
      "19 marker: CD4     cell type: CD4_T_Cell    cellTypeMarkerNum: 2    state: +     FC: 0.98118     pct.1: 0.318     pct.2: 0.124    p_val: 2.4680320619197e-25\n",
      "20 marker: CDH5     cell type: VSMC    cellTypeMarkerNum: 2    state: -     FC: -5.51059     pct.1: 0.005     pct.2: 0.068    p_val: 6.3302248793978e-20\n",
      "21 marker: PAX5     cell type: B_Cells    cellTypeMarkerNum: 3    state: -     FC: -3.63298     pct.1: 0.01     pct.2: 0.069    p_val: 1.7556397747914e-15\n",
      "22 marker: GATA1     cell type: GATA1neg_Mks    cellTypeMarkerNum: 3    state: -     FC: -5.33977     pct.1: 0.002     pct.2: 0.052    p_val: 3.03266971558838e-12\n",
      "23 marker: CD19     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -3.67557     pct.1: 0.003     pct.2: 0.037    p_val: 4.49932046958459e-10\n",
      "24 marker: CD19     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -3.67557     pct.1: 0.003     pct.2: 0.037    p_val: 4.49932046958459e-10\n",
      "25 marker: CD19     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -3.67557     pct.1: 0.003     pct.2: 0.037    p_val: 4.49932046958459e-10\n",
      "26 marker: CD19     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -3.67557     pct.1: 0.003     pct.2: 0.037    p_val: 4.49932046958459e-10\n",
      "27 marker: CD19     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -3.67557     pct.1: 0.003     pct.2: 0.037    p_val: 4.49932046958459e-10\n",
      "28 marker: CD19     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -3.67557     pct.1: 0.003     pct.2: 0.037    p_val: 4.49932046958459e-10\n",
      "29 marker: CD19     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -3.67557     pct.1: 0.003     pct.2: 0.037    p_val: 4.49932046958459e-10\n",
      "30 marker: CD19     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -3.67557     pct.1: 0.003     pct.2: 0.037    p_val: 4.49932046958459e-10\n",
      "31 marker: CXCL12     cell type: SEC    cellTypeMarkerNum: 3    state: -     FC: -2.85718     pct.1: 0.518     pct.2: 0.326    p_val: 1.0153493943441e-07\n",
      "32 marker: SPINK2     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -2.09754     pct.1: 0.07     pct.2: 0.146    p_val: 3.00154821913662e-07\n",
      "33 marker: CD3E     cell type: HSC    cellTypeMarkerNum: 9    state: -     FC: -4.10804     pct.1: 0.007     pct.2: 0.055    p_val: 3.44275032411039e-07\n",
      "34 marker: CD3E     cell type: SPINK2_HSPC    cellTypeMarkerNum: 7    state: -     FC: -4.10804     pct.1: 0.007     pct.2: 0.055    p_val: 3.44275032411039e-07\n",
      "35 marker: CD3E     cell type: HSPC    cellTypeMarkerNum: 7    state: -     FC: -4.10804     pct.1: 0.007     pct.2: 0.055    p_val: 3.44275032411039e-07\n",
      "36 marker: CD3E     cell type: GMP    cellTypeMarkerNum: 9    state: -     FC: -4.10804     pct.1: 0.007     pct.2: 0.055    p_val: 3.44275032411039e-07\n",
      "37 marker: CD3E     cell type: GMP_Myeloblast    cellTypeMarkerNum: 7    state: -     FC: -4.10804     pct.1: 0.007     pct.2: 0.055    p_val: 3.44275032411039e-07\n",
      "38 marker: CD3E     cell type: CLP    cellTypeMarkerNum: 8    state: -     FC: -4.10804     pct.1: 0.007     pct.2: 0.055    p_val: 3.44275032411039e-07\n",
      "39 marker: CD3E     cell type: MEP_Early_Erythroblast    cellTypeMarkerNum: 7    state: -     FC: -4.10804     pct.1: 0.007     pct.2: 0.055    p_val: 3.44275032411039e-07\n",
      "40 marker: CD3E     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: -     FC: -4.10804     pct.1: 0.007     pct.2: 0.055    p_val: 3.44275032411039e-07\n",
      "41 marker: CD14     cell type: Monocytes    cellTypeMarkerNum: 1    state: +     FC: 0.4305     pct.1: 0.11     pct.2: 0.063    p_val: 2.78190170676087e-06\n",
      "42 marker: CD14     cell type: Non_Classical_Monocyte    cellTypeMarkerNum: 3    state: lo     FC: 0.4305     pct.1: 0.11     pct.2: 0.063    p_val: 2.78190170676087e-06\n",
      "43 marker: ITGB3     cell type: CD34_CD61    cellTypeMarkerNum: 7    state: +     FC: 1.49087     pct.1: 0.042     pct.2: 0.013    p_val: 1.3553233849032e-05\n",
      "44 marker: ITGB3     cell type: GATA1neg_Mks    cellTypeMarkerNum: 3    state: +     FC: 1.49087     pct.1: 0.042     pct.2: 0.013    p_val: 1.3553233849032e-05\n",
      "45 marker: ITGB3     cell type: GATA1pos_Mks    cellTypeMarkerNum: 3    state: +     FC: 1.49087     pct.1: 0.042     pct.2: 0.013    p_val: 1.3553233849032e-05\n",
      "cell Type: Adipocyte   cellTypeMarkerNum: 2   markersNum: 1   percent: 0.5\n",
      "cell Type: VSMC   cellTypeMarkerNum: 2   markersNum: 2   percent: 1.0\n",
      "cell Type: Adipo_MSC   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: THY1_MSC   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: HSC   cellTypeMarkerNum: 9   markersNum: 5   percent: 0.5556\n",
      "cell Type: CLP   cellTypeMarkerNum: 8   markersNum: 4   percent: 0.5\n",
      "cell Type: GMP   cellTypeMarkerNum: 9   markersNum: 4   percent: 0.4444\n",
      "cell Type: Endosteal   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: SPINK2_HSPC   cellTypeMarkerNum: 7   markersNum: 3   percent: 0.4286\n",
      "cell Type: HSPC   cellTypeMarkerNum: 7   markersNum: 4   percent: 0.5714\n",
      "cell Type: GMP_Myeloblast   cellTypeMarkerNum: 7   markersNum: 3   percent: 0.4286\n",
      "cell Type: MEP_Early_Erythroblast   cellTypeMarkerNum: 7   markersNum: 3   percent: 0.4286\n",
      "cell Type: CD34_CD61   cellTypeMarkerNum: 7   markersNum: 4   percent: 0.5714\n",
      "cell Type: Schwann_Cell   cellTypeMarkerNum: 2   markersNum: 1   percent: 0.5\n",
      "cell Type: CD4_T_Cell   cellTypeMarkerNum: 2   markersNum: 1   percent: 0.5\n",
      "cell Type: B_Cells   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: GATA1neg_Mks   cellTypeMarkerNum: 3   markersNum: 2   percent: 0.6667\n",
      "cell Type: SEC   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: Monocytes   cellTypeMarkerNum: 1   markersNum: 1   percent: 1.0\n",
      "cell Type: Non_Classical_Monocyte   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "cell Type: GATA1pos_Mks   cellTypeMarkerNum: 3   markersNum: 1   percent: 0.3333\n",
      "clusterType: 8   完全匹配数量: 2    细胞类型: VSMC,Monocytes\n"
     ]
    }
   ],
   "source": [
    "clusterType = 8\n",
    "get_annotation2(clusterType)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "karl",
   "language": "python",
   "name": "karl"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
